Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-slider
diff --git a/app-packages/accumulo/appConfig.json b/app-packages/accumulo/appConfig.json
index 8fe9a18..6b7033e 100644
--- a/app-packages/accumulo/appConfig.json
+++ b/app-packages/accumulo/appConfig.json
@@ -8,7 +8,7 @@
"java_home": "/usr/jdk64/jdk1.7.0_45",
"package_list": "files/accumulo-${accumulo.version}-bin.tar.gz",
"site.global.app_user": "yarn",
- "site.global.app_log_dir": "${AGENT_LOG_ROOT}/app/log",
+ "site.global.app_log_dir": "${AGENT_LOG_ROOT}",
"site.global.app_pid_dir": "${AGENT_WORK_ROOT}/app/run",
"site.global.app_root": "${AGENT_WORK_ROOT}/app/install/accumulo-${accumulo.version}",
"site.global.app_install_dir": "${AGENT_WORK_ROOT}/app/install",
diff --git a/app-packages/accumulo/metainfo.xml b/app-packages/accumulo/metainfo.xml
index 4cf6c79..b1aa9de 100644
--- a/app-packages/accumulo/metainfo.xml
+++ b/app-packages/accumulo/metainfo.xml
@@ -40,7 +40,7 @@
</value>
</export>
<export>
- <name>org.apache.slider.jmx</name>
+ <name>app.jmx</name>
<value>
${site.global.monitor_protocol}://${ACCUMULO_MONITOR_HOST}:${site.accumulo-site.monitor.port.client}/xml
</value>
@@ -85,6 +85,7 @@
<name>ACCUMULO_MONITOR</name>
<category>MASTER</category>
<publishConfig>true</publishConfig>
+ <appExports>QuickLinks-app.jmx,QuickLinks-org.apache.slider.monitor</appExports>
<commandScript>
<script>scripts/accumulo_monitor.py</script>
<scriptType>PYTHON</scriptType>
diff --git a/app-packages/accumulo/pom.xml b/app-packages/accumulo/pom.xml
index 45dfd87..bcf97e9 100644
--- a/app-packages/accumulo/pom.xml
+++ b/app-packages/accumulo/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
@@ -81,6 +81,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
+ <version>${maven-failsafe-plugin.version}</version>
<executions>
<execution>
<id>run-integration-tests</id>
@@ -122,14 +123,6 @@
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -143,15 +136,6 @@
</dependency>
</dependencies>
</plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <!-- can't figure out how to get the surefire plugin not to pick up the ITs, so skip it entirely -->
- <skip>true</skip>
- </configuration>
- </plugin>
</plugins>
</build>
@@ -185,6 +169,12 @@
</dependency>
<dependency>
<groupId>org.apache.slider</groupId>
+ <artifactId>slider-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.slider</groupId>
<artifactId>slider-funtest</artifactId>
<scope>test</scope>
</dependency>
@@ -193,6 +183,11 @@
<artifactId>groovy-all</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
</project>
diff --git a/app-packages/app-pkg-template/README.txt b/app-packages/app-pkg-template/README.txt
new file mode 100644
index 0000000..00dfdbc
--- /dev/null
+++ b/app-packages/app-pkg-template/README.txt
@@ -0,0 +1,34 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+How to create a Slider app package for myapp?
+
+To create the app package you will need the application tarball copied to a specific location.
+
+E.g.
+ cp ~/Downloads/myapp-1.0.0.tar package/files/
+
+Create a zip package at the root of the package (<slider enlistment>/app-packages/myapp/)
+ zip -r myapp-1.0.0.zip .
+
+Verify the content using
+ zip -Tv myapp-1.0.0.zip
+
+While appConfig.json and resources.json are not required for the package they work
+well as the default configuration for Slider apps. So its advisable that when you
+create an application package for Slider, include sample/default resources.json and
+appConfig.json for a one-node Yarn cluster.
diff --git a/app-packages/app-pkg-template/appConfig.json b/app-packages/app-pkg-template/appConfig.json
new file mode 100644
index 0000000..a6f61f9
--- /dev/null
+++ b/app-packages/app-pkg-template/appConfig.json
@@ -0,0 +1,21 @@
+{
+ "schema": "http://example.org/specification/v2.0.0",
+ "metadata": {
+ },
+ "global": {
+ "application.def": "package/myapp-1.0.0.zip",
+ "java_home": "/usr/jdk64/jdk1.7.0_45",
+
+ "site.global.app_user": "yarn",
+ "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/myapp-1.0.0",
+
+ "site.global.listen_port": "${MYAPP_COMPONENT.ALLOCATED_PORT}"
+ },
+ "components": {
+ "slider-appmaster": {
+ "jvm.heapsize": "256M"
+ },
+ "MYAPP_COMPONENT": {
+ }
+ }
+}
diff --git a/app-packages/app-pkg-template/metainfo.xml b/app-packages/app-pkg-template/metainfo.xml
new file mode 100644
index 0000000..c6e1485
--- /dev/null
+++ b/app-packages/app-pkg-template/metainfo.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<metainfo>
+ <schemaVersion>2.0</schemaVersion>
+ <application>
+ <name>MyApp</name>
+ <comment>MyApp is an app that can run on YARN.</comment>
+ <version>1.0.0</version>
+ <exportedConfigs>None</exportedConfigs>
+
+ <components>
+ <component>
+ <name>MYAPP_COMPONENT</name>
+ <category>MASTER</category>
+ <exports>
+ <export>
+ <name>host_port</name>
+ <value>${THIS_HOST}:${site.global.listen_port}</value>
+ </export>
+ </exports>
+ <commandScript>
+ <script>scripts/myapp_component.py</script>
+ <scriptType>PYTHON</scriptType>
+ </commandScript>
+ </component>
+ </components>
+
+ <osSpecifics>
+ <osSpecific>
+ <osType>any</osType>
+ <packages>
+ <package>
+ <type>tarball</type>
+ <name>files/myapp-1.0.0.tar</name>
+ </package>
+ </packages>
+ </osSpecific>
+ </osSpecifics>
+
+ </application>
+</metainfo>
diff --git a/app-packages/app-pkg-template/package/files/myapp-1.0.0.tar.REPLACE b/app-packages/app-pkg-template/package/files/myapp-1.0.0.tar.REPLACE
new file mode 100644
index 0000000..2114587
--- /dev/null
+++ b/app-packages/app-pkg-template/package/files/myapp-1.0.0.tar.REPLACE
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace this file with a tarball of the myapp version you want to package.
diff --git a/app-packages/app-pkg-template/package/scripts/myapp_master.py b/app-packages/app-pkg-template/package/scripts/myapp_master.py
new file mode 100644
index 0000000..e6bc867
--- /dev/null
+++ b/app-packages/app-pkg-template/package/scripts/myapp_master.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+class MyApp_Component(Script):
+ def install(self, env):
+ self.install_packages(env)
+
+ def configure(self, env):
+ import params
+ env.set_params(params)
+
+ def start(self, env):
+ import params
+ env.set_params(params)
+ self.configure(env)
+
+ def stop(self, env):
+ import params
+ env.set_params(params)
+
+ def status(self, env):
+ import params
+ env.set_params(params)
+
+if __name__ == "__main__":
+ MyApp_Component().execute()
diff --git a/app-packages/app-pkg-template/package/scripts/params.py b/app-packages/app-pkg-template/package/scripts/params.py
new file mode 100644
index 0000000..e81bda0
--- /dev/null
+++ b/app-packages/app-pkg-template/package/scripts/params.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+app_root = config['configurations']['global']['app_root']
+java64_home = config['hostLevelParams']['java_home']
+app_user = config['configurations']['global']['app_user']
+port = config['configurations']['global']['listen_port']
+
diff --git a/app-packages/app-pkg-template/resources.json b/app-packages/app-pkg-template/resources.json
new file mode 100644
index 0000000..0e95879
--- /dev/null
+++ b/app-packages/app-pkg-template/resources.json
@@ -0,0 +1,16 @@
+{
+ "schema" : "http://example.org/specification/v2.0.0",
+ "metadata" : {
+ },
+ "global" : {
+ },
+ "components": {
+ "slider-appmaster": {
+ },
+ "MYAPP_COMPONENT": {
+ "yarn.role.priority": "1",
+ "yarn.component.instances": "1",
+ "yarn.memory": "256"
+ }
+ }
+}
\ No newline at end of file
diff --git a/app-packages/command-logger/application-pkg/pom.xml b/app-packages/command-logger/application-pkg/pom.xml
index 53f7fd2..18c5435 100644
--- a/app-packages/command-logger/application-pkg/pom.xml
+++ b/app-packages/command-logger/application-pkg/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
@@ -55,11 +55,6 @@
</plugin>
<plugin>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.0</version>
- </plugin>
-
- <plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<version>${apache-rat-plugin.version}</version>
diff --git a/app-packages/command-logger/slider-pkg/pom.xml b/app-packages/command-logger/slider-pkg/pom.xml
index 0971868..2bb19b8 100644
--- a/app-packages/command-logger/slider-pkg/pom.xml
+++ b/app-packages/command-logger/slider-pkg/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/app-packages/hbase/README.txt b/app-packages/hbase/README.txt
index b4e4ccd..1d5c4bb 100644
--- a/app-packages/hbase/README.txt
+++ b/app-packages/hbase/README.txt
@@ -28,14 +28,14 @@
****** OPTION - I (use mvn command) **
You need the HBase version available on local maven repo to create the Slider App Package for HBase.
-The version of HBase used for the app package can be adjusted by adding a
-flag such as
- -Dhbase.version=0.98.3
-
Download the tarball for HBase:
e.g. path to tarball ~/Downloads/hbase-0.98.3-hadoop2-bin.tar.gz
-Use the following command to install HBase tarball locally:
+The version of HBase used for the app package can be adjusted by adding a
+flag such as
+ -Dhbase.version=0.98.3-hadoop2
+
+Use the following command to install HBase tarball locally (under local workspace of HBase repo):
mvn install:install-file -Dfile=<path-to-tarball> -DgroupId=org.apache.hbase -DartifactId=hbase -Dversion=0.98.3-hadoop2 -Dclassifier=bin -Dpackaging=tar.gz
You may need to copy the hbase tarball to the following location if the above step doesn't publish the tarball:
diff --git a/app-packages/hbase/appConfig.json b/app-packages/hbase/appConfig.json
index 20cd436..d00ae6d 100644
--- a/app-packages/hbase/appConfig.json
+++ b/app-packages/hbase/appConfig.json
@@ -9,7 +9,7 @@
"java_home": "/usr/jdk64/jdk1.7.0_45",
"package_list": "files/hbase-${hbase.version}-bin.tar.gz",
"site.global.app_user": "yarn",
- "site.global.app_log_dir": "${AGENT_LOG_ROOT}/app/log",
+ "site.global.app_log_dir": "${AGENT_LOG_ROOT}",
"site.global.app_pid_dir": "${AGENT_WORK_ROOT}/app/run",
"site.global.app_root": "${AGENT_WORK_ROOT}/app/install/hbase-${hbase.version}",
"site.global.app_install_dir": "${AGENT_WORK_ROOT}/app/install",
@@ -23,6 +23,9 @@
"site.global.ganglia_server_host": "${NN_HOST}",
"site.global.ganglia_server_port": "8667",
"site.global.ganglia_server_id": "Application1",
+ "site.global.hbase_thrift_port": "${HBASE_THRIFT.ALLOCATED_PORT}",
+ "site.global.hbase_thrift2_port": "${HBASE_THRIFT2.ALLOCATED_PORT}",
+ "site.global.hbase_rest_port": "${HBASE_REST.ALLOCATED_PORT}",
"site.hbase-site.hbase.hstore.flush.retries.number": "120",
"site.hbase-site.hbase.client.keyvalue.maxsize": "10485760",
"site.hbase-site.hbase.hstore.compactionThreshold": "3",
diff --git a/app-packages/hbase/get-hbase-site.py b/app-packages/hbase/get-hbase-site.py
new file mode 100644
index 0000000..9760781
--- /dev/null
+++ b/app-packages/hbase/get-hbase-site.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Gets hbase-site.xml from running HBase instance
+First argument is the name of cluster instance
+"""
+
+import sys
+import urllib2
+import subprocess
+
+f=subprocess.Popen("slider status "+sys.argv[1], shell=True, stdout=subprocess.PIPE).stdout
+for line in f:
+ pos = line.find("info.am.web.url")
+ if pos > 0 :
+ part = line[(pos+20) :]
+ endPos = part.find("\"")
+ url = part[: (endPos-1)]
+ url = url + "/ws/v1/slider/publisher/slider/hbase-site.xml"
+ print url
+ response = urllib2.urlopen(url)
+ html = response.read()
+
+ fout=open("hbase-site.xml", "w")
+ fout.write(html)
+ fout.close()
+ f.close()
+
+ sys.exit(0)
+
+print "info.am.web.url key was not found for " + sys.argv[1]
+sys.exit(1)
diff --git a/app-packages/hbase/get-hbase-site.sh b/app-packages/hbase/get-hbase-site.sh
index 0edac30..5211d83 100755
--- a/app-packages/hbase/get-hbase-site.sh
+++ b/app-packages/hbase/get-hbase-site.sh
@@ -1,24 +1,17 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
tuple=`slider status $1 | grep "info.am.web.url"`
+echo $tuple
FS=":"
url=`echo $tuple | awk '{split($0,array,": ")} END{print array[2]}'`
url="${url%,}"
url="${url%\"}"
url="${url#\"}"
-url="${url}ws/v1/slider/publisher/slider/hbase-site.xml"
-curl -k -o hbase-site.xml $url
+siteurl="${url}ws/v1/slider/publisher/slider/hbase-site.xml"
+curl -k -o hbase-site.dnld $siteurl
+grep -v 'hbase.tmp.dir' hbase-site.dnld > hbase-site.xml
+
+linksurl="${url}ws/v1/slider/publisher/slider/quicklinks"
+curl -k -o links.json $linksurl
+python $DIR/links.py
+#| sed -e 's/\/\///g' | awk 'BEGIN { FS = ":" } ; { print $2 }'
diff --git a/app-packages/hbase/links.py b/app-packages/hbase/links.py
new file mode 100644
index 0000000..19b0d91
--- /dev/null
+++ b/app-packages/hbase/links.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from __future__ import print_function
+import logging
+import json
+
+file = open("links.json")
+links = json.load(file)
+file.close()
+if links.has_key("entries"):
+ entries = links["entries"]
+ if entries.has_key("org.apache.slider.hbase.rest"):
+ print("org.apache.slider.hbase.rest : %s" % entries["org.apache.slider.hbase.rest"])
+ if entries.has_key("org.apache.slider.hbase.thrift"):
+ print("org.apache.slider.hbase.thrift : %s" % entries["org.apache.slider.hbase.thrift"])
+ if entries.has_key("org.apache.slider.hbase.thrift2"):
+ print("org.apache.slider.hbase.thrift2 : %s" % entries["org.apache.slider.hbase.thrift2"])
diff --git a/app-packages/hbase/metainfo.xml b/app-packages/hbase/metainfo.xml
index 99413f6..aae048d 100644
--- a/app-packages/hbase/metainfo.xml
+++ b/app-packages/hbase/metainfo.xml
@@ -41,11 +41,23 @@
<value>http://${HBASE_MASTER_HOST}:${site.hbase-site.hbase.master.info.port}/master-status</value>
</export>
<export>
- <name>org.apache.slider.metrics</name>
+ <name>org.apache.slider.hbase.rest</name>
+ <value>http://${HBASE_REST_HOST}:${site.global.hbase_rest_port}</value>
+ </export>
+ <export>
+ <name>org.apache.slider.hbase.thrift2</name>
+ <value>http://${HBASE_THRIFT2_HOST}:${site.global.hbase_thrift2_port}</value>
+ </export>
+ <export>
+ <name>org.apache.slider.hbase.thrift</name>
+ <value>http://${HBASE_THRIFT_HOST}:${site.global.hbase_thrift_port}</value>
+ </export>
+ <export>
+ <name>app.metrics</name>
<value>http://${site.global.ganglia_server_host}/cgi-bin/rrd.py?c=${site.global.ganglia_server_id}</value>
</export>
<export>
- <name>org.apache.slider.ganglia</name>
+ <name>app.ganglia</name>
<value>http://${site.global.ganglia_server_host}/ganglia?c=${site.global.ganglia_server_id}</value>
</export>
</exports>
@@ -56,6 +68,12 @@
<command>HBASE_REGIONSERVER-START</command>
<requires>HBASE_MASTER-STARTED</requires>
</commandOrder>
+ <commandOrder>
+ <command>HBASE_MASTER-START</command>
+ <requires>HBASE_REST-INSTALLED</requires>
+ <requires>HBASE_THRIFT-INSTALLED</requires>
+ <requires>HBASE_THRIFT2-INSTALLED</requires>
+ </commandOrder>
</commandOrders>
<components>
<component>
@@ -63,6 +81,17 @@
<category>MASTER</category>
<minInstanceCount>1</minInstanceCount>
<maxInstanceCount>2</maxInstanceCount>
+ <appExports>QuickLinks-org.apache.slider.jmx,QuickLinks-org.apache.slider.monitor,QuickLinks-app.metrics,QuickLinks-app.ganglia</appExports>
+ <componentExports>
+ <componentExport>
+ <name>app.jmx</name>
+ <value>${THIS_HOST}:${site.hbase-site.hbase.master.info.port}/jmx</value>
+ </componentExport>
+ <componentExport>
+ <name>app.monitor</name>
+ <value>${THIS_HOST}:${site.hbase-site.hbase.master.info.port}/master-status</value>
+ </componentExport>
+ </componentExports>
<commandScript>
<script>scripts/hbase_master.py</script>
<scriptType>PYTHON</scriptType>
@@ -81,6 +110,39 @@
</component>
<component>
+ <name>HBASE_REST</name>
+ <category>MASTER</category>
+ <minInstanceCount>0</minInstanceCount>
+ <appExports>QuickLinks-org.apache.slider.hbase.rest</appExports>
+ <commandScript>
+ <script>scripts/hbase_rest.py</script>
+ <scriptType>PYTHON</scriptType>
+ </commandScript>
+ </component>
+
+ <component>
+ <name>HBASE_THRIFT</name>
+ <category>MASTER</category>
+ <minInstanceCount>0</minInstanceCount>
+ <appExports>QuickLinks-org.apache.slider.hbase.thrift</appExports>
+ <commandScript>
+ <script>scripts/hbase_thrift.py</script>
+ <scriptType>PYTHON</scriptType>
+ </commandScript>
+ </component>
+
+ <component>
+ <name>HBASE_THRIFT2</name>
+ <category>MASTER</category>
+ <minInstanceCount>0</minInstanceCount>
+ <appExports>QuickLinks-org.apache.slider.hbase.thrift2</appExports>
+ <commandScript>
+ <script>scripts/hbase_thrift2.py</script>
+ <scriptType>PYTHON</scriptType>
+ </commandScript>
+ </component>
+
+ <component>
<name>HBASE_CLIENT</name>
<category>CLIENT</category>
<minInstanceCount>0</minInstanceCount>
diff --git a/app-packages/hbase/package/scripts/hbase_regionserver.py b/app-packages/hbase/package/scripts/hbase_regionserver.py
index 8d66dcc..daa5732 100644
--- a/app-packages/hbase/package/scripts/hbase_regionserver.py
+++ b/app-packages/hbase/package/scripts/hbase_regionserver.py
@@ -58,9 +58,6 @@
pid_file = format("{pid_dir}/hbase-{hbase_user}-regionserver.pid")
check_process_status(pid_file)
- def decommission(self, env):
- print "Decommission not yet implemented!"
-
if __name__ == "__main__":
HbaseRegionServer().execute()
diff --git a/app-packages/hbase/package/scripts/hbase_rest.py b/app-packages/hbase/package/scripts/hbase_rest.py
new file mode 100644
index 0000000..36b51f9
--- /dev/null
+++ b/app-packages/hbase/package/scripts/hbase_rest.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+from hbase import hbase
+from hbase_service import hbase_service
+
+
+class HbaseRest(Script):
+ def install(self, env):
+ self.install_packages(env)
+
+ def configure(self, env):
+ import params
+ env.set_params(params)
+
+ hbase(name='rest')
+
+ def start(self, env):
+ import params
+ env.set_params(params)
+ self.configure(env) # for security
+
+ hbase_service( 'rest',
+ action = 'start'
+ )
+
+ def stop(self, env):
+ import params
+ env.set_params(params)
+
+ hbase_service( 'rest',
+ action = 'stop'
+ )
+
+ def status(self, env):
+ import status_params
+ env.set_params(status_params)
+ pid_file = format("{pid_dir}/hbase-{hbase_user}-rest.pid")
+ check_process_status(pid_file)
+
+if __name__ == "__main__":
+ HbaseRest().execute()
diff --git a/app-packages/hbase/package/scripts/hbase_service.py b/app-packages/hbase/package/scripts/hbase_service.py
index 2b30083..96add84 100644
--- a/app-packages/hbase/package/scripts/hbase_service.py
+++ b/app-packages/hbase/package/scripts/hbase_service.py
@@ -35,6 +35,12 @@
if action == 'start':
daemon_cmd = format("{cmd} start {role}")
+ if name == 'rest':
+ daemon_cmd = format("{daemon_cmd} -p {rest_port}")
+ elif name == 'thrift':
+ daemon_cmd = format("{daemon_cmd} -p {thrift_port}")
+ elif name == 'thrift2':
+ daemon_cmd = format("{daemon_cmd} -p {thrift2_port}")
no_op_test = format("ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1")
elif action == 'stop':
daemon_cmd = format("{cmd} stop {role} && rm -f {pid_file}")
diff --git a/app-packages/hbase/package/scripts/hbase_thrift.py b/app-packages/hbase/package/scripts/hbase_thrift.py
new file mode 100644
index 0000000..84bfc62
--- /dev/null
+++ b/app-packages/hbase/package/scripts/hbase_thrift.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+from hbase import hbase
+from hbase_service import hbase_service
+
+
+class HbaseThrift(Script):
+ def install(self, env):
+ self.install_packages(env)
+
+ def configure(self, env):
+ import params
+ env.set_params(params)
+
+ hbase(name='thrift')
+
+ def start(self, env):
+ import params
+ env.set_params(params)
+ self.configure(env) # for security
+
+ hbase_service( 'thrift',
+ action = 'start'
+ )
+
+ def stop(self, env):
+ import params
+ env.set_params(params)
+
+ hbase_service( 'thrift',
+ action = 'stop'
+ )
+
+ def status(self, env):
+ import status_params
+ env.set_params(status_params)
+ pid_file = format("{pid_dir}/hbase-{hbase_user}-thrift.pid")
+ check_process_status(pid_file)
+
+if __name__ == "__main__":
+ HbaseThrift().execute()
diff --git a/app-packages/hbase/package/scripts/hbase_thrift2.py b/app-packages/hbase/package/scripts/hbase_thrift2.py
new file mode 100644
index 0000000..b72196c
--- /dev/null
+++ b/app-packages/hbase/package/scripts/hbase_thrift2.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+from hbase import hbase
+from hbase_service import hbase_service
+
+
+class HbaseThrift2(Script):
+ def install(self, env):
+ self.install_packages(env)
+
+ def configure(self, env):
+ import params
+ env.set_params(params)
+
+ hbase(name='thrift2')
+
+ def start(self, env):
+ import params
+ env.set_params(params)
+ self.configure(env) # for security
+
+ hbase_service( 'thrift2',
+ action = 'start'
+ )
+
+ def stop(self, env):
+ import params
+ env.set_params(params)
+
+ hbase_service( 'thrift2',
+ action = 'stop'
+ )
+
+ def status(self, env):
+ import status_params
+ env.set_params(status_params)
+ pid_file = format("{pid_dir}/hbase-{hbase_user}-thrift2.pid")
+ check_process_status(pid_file)
+
+if __name__ == "__main__":
+ HbaseThrift2().execute()
diff --git a/app-packages/hbase/package/scripts/params.py b/app-packages/hbase/package/scripts/params.py
index 0d8b04b..1f25f68 100644
--- a/app-packages/hbase/package/scripts/params.py
+++ b/app-packages/hbase/package/scripts/params.py
@@ -57,6 +57,10 @@
ganglia_server_host = default('/configurations/global/ganglia_server_host', '')
ganglia_server_port = default('/configurations/global/ganglia_server_port', '8663')
+rest_port = config['configurations']['global']['hbase_rest_port']
+thrift_port = config['configurations']['global']['hbase_thrift_port']
+thrift2_port = config['configurations']['global']['hbase_thrift2_port']
+
if security_enabled:
_use_hostname_in_principal = default('instance_name', True)
diff --git a/app-packages/hbase/pom.xml b/app-packages/hbase/pom.xml
index 3854496..7dede6c 100644
--- a/app-packages/hbase/pom.xml
+++ b/app-packages/hbase/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.31.0-incubating-SNAPSHOT</version>
+ <version>0.41.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
@@ -121,14 +121,6 @@
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
diff --git a/app-packages/hbase/resources.json b/app-packages/hbase/resources.json
index e0ff26f..d2fdbd8 100644
--- a/app-packages/hbase/resources.json
+++ b/app-packages/hbase/resources.json
@@ -16,6 +16,21 @@
"yarn.role.priority": "2",
"yarn.component.instances": "1",
"yarn.memory": "256"
+ },
+ "HBASE_REST": {
+ "yarn.role.priority": "3",
+ "yarn.component.instances": "1",
+ "yarn.memory": "256"
+ },
+ "HBASE_THRIFT": {
+ "yarn.role.priority": "4",
+ "yarn.component.instances": "1",
+ "yarn.memory": "256"
+ },
+ "HBASE_THRIFT2": {
+ "yarn.role.priority": "5",
+ "yarn.component.instances": "1",
+ "yarn.memory": "256"
}
}
}
diff --git a/app-packages/memcached-win/README.txt b/app-packages/memcached-win/README.txt
new file mode 100644
index 0000000..4d93b91
--- /dev/null
+++ b/app-packages/memcached-win/README.txt
@@ -0,0 +1,36 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+How to create a Slider app package for Memcached for Windows?
+
+To create the app package you will need the Memcached tarball copied to a specific location.
+
+Replace the placeholder jar files for JMemcached.
+ cp ~/Downloads/jmemcached-cli-1.0.0.jar package/files/jmemcached-1.0.0/
+ cp ~/Downloads/jmemcached-core-1.0.0.jar package/files/jmemcached-1.0.0/
+ rm package/files/jmemcached-1.0.0/*.REPLACEME
+
+Create a zip package at the root of the package (<slider enlistment>/app-packages/memcached/)
+ zip -r jmemcached-1.0.0.zip .
+
+Verify the content using
+ unzip -l "$@" jmemcached-1.0.0.zip
+
+While appConfig.json and resources.json are not required for the package they work
+well as the default configuration for Slider apps. So its advisable that when you
+create an application package for Slider, include sample/default resources.json and
+appConfig.json for a minimal Yarn cluster.
diff --git a/app-packages/memcached-win/appConfig.json b/app-packages/memcached-win/appConfig.json
new file mode 100644
index 0000000..b76ecde
--- /dev/null
+++ b/app-packages/memcached-win/appConfig.json
@@ -0,0 +1,26 @@
+{
+ "schema": "http://example.org/specification/v2.0.0",
+ "metadata": {
+ },
+ "global": {
+ "application.def": "/slider/jmemcached-1.0.0.zip",
+ "java_home": "C:\\java",
+
+ "site.global.app_user": "hadoop",
+ "site.global.app_root": "${AGENT_WORK_ROOT}\\app\\install",
+ "site.global.pid_file": "${AGENT_WORK_ROOT}\\app\\run\\component.pid",
+ "site.global.additional_cp": "C:\\hdp\\hadoop-2.4.0.2.1.3.0-1990\\share\\hadoop\\common\\lib\\*",
+ "site.global.xmx_val": "256m",
+ "site.global.xms_val": "128m",
+ "site.global.memory_val": "200M",
+ "site.global.listen_port": "${MEMCACHED.ALLOCATED_PORT}{DO_NOT_PROPAGATE}"
+
+ },
+ "components": {
+ "slider-appmaster": {
+ "jvm.heapsize": "256M"
+ },
+ "MEMCACHED": {
+ }
+ }
+}
diff --git a/app-packages/memcached-win/metainfo.xml b/app-packages/memcached-win/metainfo.xml
new file mode 100644
index 0000000..d056c0a
--- /dev/null
+++ b/app-packages/memcached-win/metainfo.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<metainfo>
+ <schemaVersion>2.0</schemaVersion>
+ <application>
+ <name>MEMCACHED</name>
+ <comment>Memcache is a network accessible key/value storage system, often used as a distributed cache.</comment>
+ <version>1.0.0</version>
+ <exportedConfigs>None</exportedConfigs>
+
+ <components>
+ <component>
+ <name>MEMCACHED</name>
+ <category>MASTER</category>
+ <exports>
+ <export>
+ <name>host_port</name>
+ <value>${THIS_HOST}:${site.global.listen_port}</value>
+ </export>
+ </exports>
+ <commandScript>
+ <script>scripts/memcached.py</script>
+ <scriptType>PYTHON</scriptType>
+ </commandScript>
+ </component>
+ </components>
+
+ <osSpecifics>
+ <osSpecific>
+ <osType>any</osType>
+ <packages>
+ <package>
+ <type>folder</type>
+ <name>files\\jmemcached-1.0.0</name>
+ </package>
+ </packages>
+ </osSpecific>
+ </osSpecifics>
+
+ </application>
+</metainfo>
diff --git a/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-cli-1.0.0.jar.REPLACEME b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-cli-1.0.0.jar.REPLACEME
new file mode 100644
index 0000000..6855ef9
--- /dev/null
+++ b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-cli-1.0.0.jar.REPLACEME
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace with the real jar.
\ No newline at end of file
diff --git a/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-core-1.0.0.jar.REPLACEME b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-core-1.0.0.jar.REPLACEME
new file mode 100644
index 0000000..6855ef9
--- /dev/null
+++ b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-core-1.0.0.jar.REPLACEME
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace with the real jar.
\ No newline at end of file
diff --git a/app-packages/memcached-win/package/scripts/memcached.py b/app-packages/memcached-win/package/scripts/memcached.py
new file mode 100644
index 0000000..bc9905d
--- /dev/null
+++ b/app-packages/memcached-win/package/scripts/memcached.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+class Memcached(Script):
+ def install(self, env):
+ self.install_packages(env)
+ pass
+
+ def configure(self, env):
+ import params
+ env.set_params(params)
+
+ def start(self, env):
+ import params
+ env.set_params(params)
+ self.configure(env)
+ process_cmd = format("{java64_home}\\bin\\java -Xmx{xmx_val} -Xms{xms_val} -classpath {app_root}\\*;{additional_cp} com.thimbleware.jmemcached.Main --memory={memory_val} --port={port}")
+
+ Execute(process_cmd,
+ user=params.app_user,
+ logoutput=False,
+ wait_for_finish=False,
+ pid_file=params.pid_file
+ )
+
+ def stop(self, env):
+ import params
+ env.set_params(params)
+
+ def status(self, env):
+ import params
+ env.set_params(params)
+ #Check process status need to be changed for Windows
+ #check_process_status(params.pid_file)
+
+if __name__ == "__main__":
+ Memcached().execute()
diff --git a/app-packages/memcached-win/package/scripts/params.py b/app-packages/memcached-win/package/scripts/params.py
new file mode 100644
index 0000000..fab3714
--- /dev/null
+++ b/app-packages/memcached-win/package/scripts/params.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+app_root = config['configurations']['global']['app_root']
+java64_home = config['hostLevelParams']['java_home']
+app_user = config['configurations']['global']['app_user']
+pid_file = config['configurations']['global']['pid_file']
+additional_cp = config['configurations']['global']['additional_cp']
+xmx_val = config['configurations']['global']['xmx_val']
+xms_val = config['configurations']['global']['xms_val']
+memory_val = config['configurations']['global']['memory_val']
+port = config['configurations']['global']['listen_port']
diff --git a/app-packages/memcached-win/resources.json b/app-packages/memcached-win/resources.json
new file mode 100644
index 0000000..f0e02ac
--- /dev/null
+++ b/app-packages/memcached-win/resources.json
@@ -0,0 +1,16 @@
+{
+ "schema" : "http://example.org/specification/v2.0.0",
+ "metadata" : {
+ },
+ "global" : {
+ },
+ "components": {
+ "slider-appmaster": {
+ },
+ "MEMCACHED": {
+ "yarn.role.priority": "1",
+ "yarn.component.instances": "1",
+ "yarn.memory": "256"
+ }
+ }
+}
\ No newline at end of file
diff --git a/app-packages/memcached/README.txt b/app-packages/memcached/README.txt
new file mode 100644
index 0000000..eed2954
--- /dev/null
+++ b/app-packages/memcached/README.txt
@@ -0,0 +1,35 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+How to create a Slider app package for Memcached?
+
+To create the app package you will need the Memcached tarball copied to a specific location.
+
+Replace the placeholder tarball for JMemcached.
+ cp ~/Downloads/jmemcached-1.0.0.tar package/files/
+ rm package/files/jmemcached-1.0.0.tar.REPLACE
+
+Create a zip package at the root of the package (<slider enlistment>/app-packages/memcached/)
+ zip -r jmemcached-1.0.0.zip .
+
+Verify the content using
+ unzip -l "$@" jmemcached-1.0.0.zip
+
+While appConfig.json and resources.json are not required for the package they work
+well as the default configuration for Slider apps. So its advisable that when you
+create an application package for Slider, include sample/default resources.json and
+appConfig.json for a minimal Yarn cluster.
diff --git a/app-packages/memcached/appConfig.json b/app-packages/memcached/appConfig.json
new file mode 100644
index 0000000..5f32030
--- /dev/null
+++ b/app-packages/memcached/appConfig.json
@@ -0,0 +1,26 @@
+{
+ "schema": "http://example.org/specification/v2.0.0",
+ "metadata": {
+ },
+ "global": {
+ "application.def": "package/jmemcached-1.0.0.zip",
+ "java_home": "/usr/jdk64/jdk1.7.0_45",
+
+ "site.global.app_user": "yarn",
+ "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/jmemcached-1.0.0",
+ "site.global.pid_file": "${AGENT_WORK_ROOT}/app/run/component.pid",
+
+ "site.global.additional_cp": "/usr/lib/hadoop/lib/*",
+ "site.global.xmx_val": "256m",
+ "site.global.xms_val": "128m",
+ "site.global.memory_val": "200M",
+ "site.global.listen_port": "${MEMCACHED.ALLOCATED_PORT}{DO_NOT_PROPAGATE}"
+ },
+ "components": {
+ "slider-appmaster": {
+ "jvm.heapsize": "256M"
+ },
+ "MEMCACHED": {
+ }
+ }
+}
diff --git a/app-packages/memcached/metainfo.xml b/app-packages/memcached/metainfo.xml
new file mode 100644
index 0000000..525816e
--- /dev/null
+++ b/app-packages/memcached/metainfo.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<metainfo>
+ <schemaVersion>2.0</schemaVersion>
+ <application>
+ <name>MEMCACHED</name>
+ <comment>Memcache is a network accessible key/value storage system, often used as a distributed cache.</comment>
+ <version>1.0.0</version>
+ <exportedConfigs>None</exportedConfigs>
+
+ <components>
+ <component>
+ <name>MEMCACHED</name>
+ <category>MASTER</category>
+ <exports>
+ <export>
+ <name>host_port</name>
+ <value>${THIS_HOST}:${site.global.listen_port}</value>
+ </export>
+ </exports>
+ <commandScript>
+ <script>scripts/memcached.py</script>
+ <scriptType>PYTHON</scriptType>
+ </commandScript>
+ </component>
+ </components>
+
+ <osSpecifics>
+ <osSpecific>
+ <osType>any</osType>
+ <packages>
+ <package>
+ <type>tarball</type>
+ <name>files/jmemcached-1.0.0.tar</name>
+ </package>
+ </packages>
+ </osSpecific>
+ </osSpecifics>
+
+ </application>
+</metainfo>
diff --git a/app-packages/memcached/package/files/jmemcached-1.0.0.tar.REPLACE b/app-packages/memcached/package/files/jmemcached-1.0.0.tar.REPLACE
new file mode 100644
index 0000000..91a16d9
--- /dev/null
+++ b/app-packages/memcached/package/files/jmemcached-1.0.0.tar.REPLACE
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace this file with a tarball which has the structure
+
+jmemcached-1.0.0/
+jmemcached-1.0.0/jmemcached-cli-1.0.0.jar
+jmemcached-1.0.0/jmemcached-core-1.0.0.jar
diff --git a/app-packages/memcached/package/scripts/memcached.py b/app-packages/memcached/package/scripts/memcached.py
new file mode 100644
index 0000000..6e14e86
--- /dev/null
+++ b/app-packages/memcached/package/scripts/memcached.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+class Memcached(Script):
+ def install(self, env):
+ self.install_packages(env)
+ pass
+
+ def configure(self, env):
+ import params
+ env.set_params(params)
+
+ def start(self, env):
+ import params
+ env.set_params(params)
+ self.configure(env)
+ process_cmd = format("{java64_home}/bin/java -Xmx{xmx_val} -Xms{xms_val} -classpath {app_root}/*:{additional_cp} com.thimbleware.jmemcached.Main --memory={memory_val} --port={port}")
+
+ Execute(process_cmd,
+ user=params.app_user,
+ logoutput=False,
+ wait_for_finish=False,
+ pid_file=params.pid_file
+ )
+
+ def stop(self, env):
+ import params
+ env.set_params(params)
+
+ def status(self, env):
+ import params
+ env.set_params(params)
+ check_process_status(params.pid_file)
+
+if __name__ == "__main__":
+ Memcached().execute()
diff --git a/app-packages/memcached/package/scripts/params.py b/app-packages/memcached/package/scripts/params.py
new file mode 100644
index 0000000..25b4055
--- /dev/null
+++ b/app-packages/memcached/package/scripts/params.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+app_root = config['configurations']['global']['app_root']
+java64_home = config['hostLevelParams']['java_home']
+app_user = config['configurations']['global']['app_user']
+pid_file = config['configurations']['global']['pid_file']
+
+additional_cp = config['configurations']['global']['additional_cp']
+xmx_val = config['configurations']['global']['xmx_val']
+xms_val = config['configurations']['global']['xms_val']
+memory_val = config['configurations']['global']['memory_val']
+port = config['configurations']['global']['listen_port']
diff --git a/app-packages/memcached/resources.json b/app-packages/memcached/resources.json
new file mode 100644
index 0000000..f0e02ac
--- /dev/null
+++ b/app-packages/memcached/resources.json
@@ -0,0 +1,16 @@
+{
+ "schema" : "http://example.org/specification/v2.0.0",
+ "metadata" : {
+ },
+ "global" : {
+ },
+ "components": {
+ "slider-appmaster": {
+ },
+ "MEMCACHED": {
+ "yarn.role.priority": "1",
+ "yarn.component.instances": "1",
+ "yarn.memory": "256"
+ }
+ }
+}
\ No newline at end of file
diff --git a/app-packages/storm/appConfig.json b/app-packages/storm/appConfig.json
index 6d6aa3a..24078cf 100644
--- a/app-packages/storm/appConfig.json
+++ b/app-packages/storm/appConfig.json
@@ -3,7 +3,7 @@
"metadata": {
},
"global": {
- "application.def": "/slider/storm_v091.zip",
+ "application.def": "package/storm_v091.zip",
"config_types": "storm-site",
"java_home": "/usr/jdk64/jdk1.7.0_45",
"package_list": "files/apache-storm-0.9.1.2.1.1.0-237.tar.gz",
@@ -68,7 +68,7 @@
"site.storm-site.logviewer.appender.name": "A1",
"site.storm-site.nimbus.host": "${NIMBUS_HOST}",
"site.storm-site.ui.port": "${STORM_UI_SERVER.ALLOCATED_PORT}",
- "site.storm-site.supervisor.slots.ports": "[${SUPERVISOR.ALLOCATED_PORT}]",
+ "site.storm-site.supervisor.slots.ports": "[${SUPERVISOR.ALLOCATED_PORT}{DO_NOT_PROPAGATE},${SUPERVISOR.ALLOCATED_PORT}{DO_NOT_PROPAGATE}]",
"site.storm-site.nimbus.file.copy.expiration.secs": "600",
"site.storm-site.supervisor.monitor.frequency.secs": "3",
"site.storm-site.transactional.zookeeper.servers": "null",
@@ -105,7 +105,7 @@
"site.storm-site.topology.trident.batch.emit.interval.millis": "500",
"site.storm-site.topology.builtin.metrics.bucket.size.secs": "60",
"site.storm-site.storm.thrift.transport": "backtype.storm.security.auth.SimpleTransportPlugin",
- "site.storm-site.logviewer.port": "0",
+ "site.storm-site.logviewer.port": "${SUPERVISOR.ALLOCATED_PORT}{DO_NOT_PROPAGATE}",
"site.storm-site.topology.debug": "false"
},
"components": {
diff --git a/app-packages/storm/metainfo.xml b/app-packages/storm/metainfo.xml
index 7edd794..dbe8549 100644
--- a/app-packages/storm/metainfo.xml
+++ b/app-packages/storm/metainfo.xml
@@ -22,27 +22,32 @@
<name>STORM</name>
<comment>Apache Hadoop Stream processing framework</comment>
<version>0.9.1.2.1</version>
+ <exportedConfigs>storm-site</exportedConfigs>
<exportGroups>
<exportGroup>
<name>QuickLinks</name>
<exports>
<export>
- <name>org.apache.slider.jmx</name>
+ <name>app.jmx</name>
<value>http://${STORM_REST_API_HOST}:${site.global.rest_api_port}/api/cluster/summary</value>
</export>
<export>
- <name>org.apache.slider.monitor</name>
+ <name>app.monitor</name>
<value>http://${STORM_UI_SERVER_HOST}:${site.storm-site.ui.port}</value>
</export>
<export>
- <name>org.apache.slider.metrics</name>
+ <name>app.metrics</name>
<value>http://${site.global.ganglia_server_host}/cgi-bin/rrd.py?c=${site.global.ganglia_server_id}</value>
</export>
<export>
- <name>org.apache.slider.ganglia</name>
+ <name>ganglia.ui</name>
<value>http://${site.global.ganglia_server_host}/ganglia?c=${site.global.ganglia_server_id}</value>
</export>
+ <export>
+ <name>nimbus.host_port</name>
+ <value>http://${NIMBUS_HOST}:${site.storm-site.nimbus.thrift.port}</value>
+ </export>
</exports>
</exportGroup>
</exportGroups>
@@ -76,6 +81,8 @@
<component>
<name>NIMBUS</name>
<category>MASTER</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
+ <appExports>QuickLinks-nimbus.host_port,QuickLinks-ganglia.ui,QuickLinks-app.metrics</appExports>
<commandScript>
<script>scripts/nimbus.py</script>
<scriptType>PYTHON</scriptType>
@@ -86,6 +93,8 @@
<component>
<name>STORM_REST_API</name>
<category>MASTER</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
+ <appExports>QuickLinks-app.jmx</appExports>
<commandScript>
<script>scripts/rest_api.py</script>
<scriptType>PYTHON</scriptType>
@@ -96,6 +105,13 @@
<component>
<name>SUPERVISOR</name>
<category>SLAVE</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
+ <componentExports>
+ <componentExport>
+ <name>log_viewer_port</name>
+ <value>${THIS_HOST}:${site.storm-site.logviewer.port}</value>
+ </componentExport>
+ </componentExports>
<commandScript>
<script>scripts/supervisor.py</script>
<scriptType>PYTHON</scriptType>
@@ -107,6 +123,8 @@
<name>STORM_UI_SERVER</name>
<category>MASTER</category>
<publishConfig>true</publishConfig>
+ <appExports>QuickLinks-app.monitor</appExports>
+ <autoStartOnFailure>true</autoStartOnFailure>
<commandScript>
<script>scripts/ui_server.py</script>
<scriptType>PYTHON</scriptType>
@@ -117,6 +135,7 @@
<component>
<name>DRPC_SERVER</name>
<category>MASTER</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
<commandScript>
<script>scripts/drpc_server.py</script>
<scriptType>PYTHON</scriptType>
@@ -136,10 +155,5 @@
</packages>
</osSpecific>
</osSpecifics>
-
- <configuration-dependencies>
- <config-type>storm-site</config-type>
- <config-type>global</config-type>
- </configuration-dependencies>
</application>
</metainfo>
diff --git a/app-packages/storm/package/scripts/service.py b/app-packages/storm/package/scripts/service.py
index 10fa5b9..13fcef2 100644
--- a/app-packages/storm/package/scripts/service.py
+++ b/app-packages/storm/package/scripts/service.py
@@ -22,7 +22,9 @@
from resource_management import *
import time
-
+"""
+Slider package uses jps as pgrep does not list the whole process start command
+"""
def service(
name,
action='start'):
@@ -30,25 +32,25 @@
import status_params
pid_file = status_params.pid_files[name]
+ container_id = status_params.container_id
no_op_test = format("ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1")
+
jps_path = format("{java64_home}/bin/jps")
- grep_and_awk = "| grep -v grep | awk '{print $1}'"
+ grep_and_awk = format("| grep {container_id}") + " | awk '{print $1}'"
if name == 'ui':
- #process_cmd = "^java.+backtype.storm.ui.core$"
- pid_chk_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.ui.core\" {grep_and_awk} > {pid_file}")
+ crt_pid_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.ui.core\" {grep_and_awk} > {pid_file}")
elif name == "rest_api":
- process_cmd = format("{java64_home}/bin/java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server")
- crt_pid_cmd = format("pgrep -f \"{process_cmd}\" && pgrep -f \"{process_cmd}\" > {pid_file}")
+ rest_process_cmd = format("{java64_home}/bin/java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server")
+ crt_pid_cmd = format("pgrep -f \"{rest_process_cmd}\" > {pid_file}")
else:
- #process_cmd = format("^java.+backtype.storm.daemon.{name}$")
- pid_chk_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.daemon.{name}\" {grep_and_awk} > {pid_file}")
+ crt_pid_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.daemon.{name}\" {grep_and_awk} > {pid_file}")
if action == "start":
if name == "rest_api":
- cmd = format("{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log")
+ cmd = format("{rest_process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log")
else:
- cmd = format("env JAVA_HOME={java64_home} PATH=$PATH:{java64_home}/bin STORM_BASE_DIR={app_root} STORM_CONF_DIR={conf_dir} {storm_bin} {name}")
+ cmd = format("env JAVA_HOME={java64_home} PATH=$PATH:{java64_home}/bin STORM_BASE_DIR={app_root} STORM_CONF_DIR={conf_dir} {storm_bin} {name} > {log_dir}/{name}.out 2>&1")
Execute(cmd,
not_if=no_op_test,
@@ -67,7 +69,7 @@
else:
content = None
for i in xrange(12):
- Execute(pid_chk_cmd,
+ Execute(crt_pid_cmd,
user=params.storm_user,
logoutput=True
)
diff --git a/app-packages/storm/package/scripts/status_params.py b/app-packages/storm/package/scripts/status_params.py
index eab83cf..5907446 100644
--- a/app-packages/storm/package/scripts/status_params.py
+++ b/app-packages/storm/package/scripts/status_params.py
@@ -21,6 +21,7 @@
config = Script.get_config()
+container_id = config['configurations']['global']['app_container_id']
pid_dir = config['configurations']['global']['app_pid_dir']
pid_nimbus = format("{pid_dir}/nimbus.pid")
pid_supervisor = format("{pid_dir}/supervisor.pid")
diff --git a/pom.xml b/pom.xml
index 496951a..9849cdf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,7 +19,7 @@
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
<name>Slider</name>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<packaging>pom</packaging>
<description>
@@ -45,7 +45,6 @@
<module>slider-providers/hbase/hbase-funtests</module>
<module>slider-providers/accumulo/slider-accumulo-provider</module>
<module>slider-providers/accumulo/accumulo-funtests</module>
- <module>slider-install</module>
</modules>
<licenses>
@@ -94,11 +93,13 @@
<properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<!--
Java versions
-->
- <project.java.src.version>1.7</project.java.src.version>
+ <project.java.src.version>1.6</project.java.src.version>
<enforced.java.version>${project.java.src.version}</enforced.java.version>
<groovy.version>2.2.2</groovy.version>
@@ -106,6 +107,7 @@
test options
-->
+ <slider.conf.dir>${project.basedir}/src/test/clusters/offline/slider</slider.conf.dir>
<test.forkedProcessTimeoutInSeconds>18000</test.forkedProcessTimeoutInSeconds>
<test.argLine>-Xmx1024m -XX:+HeapDumpOnOutOfMemoryError</test.argLine>
<test.reuseForks>false</test.reuseForks>
@@ -116,7 +118,7 @@
<!--
core artifacts
-->
- <hadoop.version>2.4.0</hadoop.version>
+ <hadoop.version>2.4.1</hadoop.version>
<hbase.version>0.98.4-hadoop2</hbase.version>
<accumulo.version>1.6.0</accumulo.version>
@@ -142,11 +144,16 @@
<jackson.version>1.9.13</jackson.version>
<jcommander.version>1.30</jcommander.version>
+
+ <jetty.version>6.1.26</jetty.version>
<jersey.version>1.9</jersey.version>
<servlet-api.version>2.5</servlet-api.version>
<jsr311-api.version>1.1.1</jsr311-api.version>
+ <jaxb-api.version>2.2.7</jaxb-api.version>
+
<junit.version>4.11</junit.version>
<log4j.version>1.2.17</log4j.version>
+ <metrics.version>3.0.1</metrics.version>
<mockito.version>1.8.5</mockito.version>
<!-- ProtocolBuffer version, used to verify the protoc version and -->
@@ -217,9 +224,42 @@
</pluginRepositories>
<build>
-
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>${maven-compiler-plugin.version}</version>
+ <configuration>
+ <compilerId>groovy-eclipse-compiler</compilerId>
+ <source>${project.java.src.version}</source>
+ <target>${project.java.src.version}</target>
+ <!-- set verbose to be true if you want lots of uninteresting messages -->
+ <!-- <verbose>true</verbose> -->
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
<plugins>
<plugin>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <version>${maven-enforcer-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>enforce-java</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <requireJavaVersion>
+ <version>[${project.java.src.version},)</version>
+ </requireJavaVersion>
+ </rules>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>${maven-source-plugin.version}</version>
@@ -1081,6 +1121,12 @@
</dependency>
<dependency>
+ <groupId>javax.xml.bind</groupId>
+ <artifactId>jaxb-api</artifactId>
+ <version>${jaxb-api.version}</version>
+ </dependency>
+
+ <dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-client</artifactId>
<version>${jersey.version}</version>
@@ -1140,6 +1186,11 @@
<version>${jersey.version}</version>
</dependency>
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ <version>${metrics.version}</version>
+ </dependency>
<!-- ======================================================== -->
<!-- Mocking -->
@@ -1162,9 +1213,19 @@
<!-- ======================================================== -->
<dependency>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty-sslengine</artifactId>
- <version>6.1.26</version>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ <version>${jetty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ <version>${jetty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-sslengine</artifactId>
+ <version>${jetty.version}</version>
</dependency>
</dependencies>
@@ -1230,7 +1291,7 @@
<!-- hadoop branch-2 builds -->
<id>branch-2</id>
<properties>
- <hadoop.version>2.5.0-SNAPSHOT</hadoop.version>
+ <hadoop.version>2.6.0-SNAPSHOT</hadoop.version>
</properties>
</profile>
diff --git a/slider-agent/conf/agent.ini b/slider-agent/conf/agent.ini
index b52bec9..7b9d57d 100644
--- a/slider-agent/conf/agent.ini
+++ b/slider-agent/conf/agent.ini
@@ -28,11 +28,11 @@
app_dbg_cmd=
debug_mode_enabled=true
-app_task_dir=app/command-log
-app_log_dir=app/log
+app_task_dir=.
+app_log_dir=.
app_tmp_dir=app/tmp
-log_dir=infra/log
+log_dir=.
run_dir=infra/run
version_file=infra/version
diff --git a/slider-agent/pom.xml b/slider-agent/pom.xml
index 7a3b447..d670f81 100644
--- a/slider-agent/pom.xml
+++ b/slider-agent/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>slider-agent</artifactId>
@@ -57,10 +57,6 @@
</plugin>
<plugin>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.0</version>
- </plugin>
- <plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.2</version>
@@ -73,7 +69,7 @@
<argument>unitTests.py</argument>
</arguments>
<environmentVariables>
- <PYTHONPATH>${project.basedir}/src/main/python/jinja2:${project.basedir}/src/test/python:${project.basedir}/src/main/python:${project.basedir}/src/main/python/agent:${project.basedir}/src/main/python/resource_management:${project.basedir}/src/test/python/agent:${project.basedir}/src/test/python/resource_management</PYTHONPATH>
+ <PYTHONPATH>${project.basedir}/src/main/python/jinja2:${project.basedir}/src/test/python:${project.basedir}/src/main/python:${project.basedir}/src/main/python/agent:${project.basedir}/src/main/python/resource_management:${project.basedir}/src/test/python/agent:${project.basedir}/src/test/python/resource_management:${project.basedir}/src/main/python/kazoo</PYTHONPATH>
</environmentVariables>
<skip>${skipTests}</skip>
</configuration>
@@ -106,6 +102,8 @@
<exclude>src/main/python/jinja2/**</exclude>
<!-- mock files (BSD license) -->
<exclude>src/test/python/mock/**</exclude>
+ <!-- kazoo files (Apache License, Version 2.0) -->
+ <exclude>src/main/python/kazoo/**</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/slider-agent/src/main/python/agent/ActionQueue.py b/slider-agent/src/main/python/agent/ActionQueue.py
index d4d8bc2..4c45a76 100644
--- a/slider-agent/src/main/python/agent/ActionQueue.py
+++ b/slider-agent/src/main/python/agent/ActionQueue.py
@@ -49,6 +49,7 @@
FAILED_STATUS = 'FAILED'
STORE_APPLIED_CONFIG = 'record_config'
+ AUTO_RESTART = 'auto_restart'
def __init__(self, config, controller):
super(ActionQueue, self).__init__()
@@ -120,6 +121,10 @@
logger.debug(pprint.pformat(command))
taskId = command['taskId']
+
+ # if auto generated then do not report result
+ reportResult = CommandStatusDict.shouldReportResult(command)
+
# Preparing 'IN_PROGRESS' report
in_progress_status = self.commandStatuses.generate_report_template(command)
in_progress_status.update({
@@ -127,12 +132,19 @@
'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(
taskId) + '.json',
- 'status': self.IN_PROGRESS_STATUS
+ 'status': self.IN_PROGRESS_STATUS,
+ 'reportResult': reportResult
})
- self.commandStatuses.put_command_status(command, in_progress_status)
+ self.commandStatuses.put_command_status(command, in_progress_status, reportResult)
+
store_config = False
if ActionQueue.STORE_APPLIED_CONFIG in command['commandParams']:
store_config = 'true' == command['commandParams'][ActionQueue.STORE_APPLIED_CONFIG]
+ store_command = False
+ if 'roleParams' in command and ActionQueue.AUTO_RESTART in command['roleParams']:
+ logger.info("Component has indicated auto-restart. Saving details from START command.")
+ store_command = 'true' == command['roleParams'][ActionQueue.AUTO_RESTART]
+
# running command
commandresult = self.customServiceOrchestrator.runCommand(command,
@@ -141,7 +153,7 @@
in_progress_status[
'tmperr'],
True,
- store_config)
+ store_config or store_command)
# dumping results
status = self.COMPLETED_STATUS
if commandresult[Constants.EXIT_CODE] != 0:
@@ -152,6 +164,7 @@
'stderr': commandresult['stderr'],
Constants.EXIT_CODE: commandresult[Constants.EXIT_CODE],
'status': status,
+ 'reportResult': reportResult
})
if roleResult['stdout'] == '':
roleResult['stdout'] = 'None'
@@ -170,7 +183,7 @@
roleResult['allocatedPorts'] = commandresult[Constants.ALLOCATED_PORTS]
if Constants.FOLDERS in commandresult:
roleResult['folders'] = commandresult[Constants.FOLDERS]
- self.commandStatuses.put_command_status(command, roleResult)
+ self.commandStatuses.put_command_status(command, roleResult, reportResult)
# Store action result to agent response queue
def result(self):
@@ -184,10 +197,7 @@
cluster = command['clusterName']
service = command['serviceName']
component = command['componentName']
- reportResult = True
- if 'auto_generated' in command:
- reportResult = not command['auto_generated']
-
+ reportResult = CommandStatusDict.shouldReportResult(command)
component_status = self.customServiceOrchestrator.requestComponentStatus(command)
result = {"componentName": component,
diff --git a/slider-agent/src/main/python/agent/AgentConfig.py b/slider-agent/src/main/python/agent/AgentConfig.py
index 16b924c..e45ba23 100644
--- a/slider-agent/src/main/python/agent/AgentConfig.py
+++ b/slider-agent/src/main/python/agent/AgentConfig.py
@@ -22,6 +22,7 @@
import StringIO
import os
import logging
+import posixpath
logger = logging.getLogger()
@@ -32,6 +33,8 @@
hostname=localhost
port=8440
secured_port=8441
+zk_quorum=localhost:2181
+zk_reg_path=/register/org-apache-slider/cl1
check_path=/ws/v1/slider/agents/
register_path=/ws/v1/slider/agents/{name}/register
heartbeat_path=/ws/v1/slider/agents/{name}/heartbeat
@@ -43,11 +46,11 @@
app_dbg_cmd=
debug_mode_enabled=true
-app_task_dir=app/command-log
-app_log_dir=app/log
+app_task_dir=.
+app_log_dir=.
app_tmp_dir=app/tmp
-log_dir=infra/log
+log_dir=.
run_dir=infra/run
version_file=infra/version
@@ -141,7 +144,7 @@
if name in AgentConfig.FOLDER_MAPPING and AgentConfig.FOLDER_MAPPING[
name] == "LOG":
root_folder_to_use = self.logroot
- return os.path.join(root_folder_to_use, relativePath)
+ return posixpath.join(root_folder_to_use, relativePath)
else:
return relativePath
diff --git a/slider-agent/src/main/python/agent/CommandStatusDict.py b/slider-agent/src/main/python/agent/CommandStatusDict.py
index 9261e29..bcbce9e 100644
--- a/slider-agent/src/main/python/agent/CommandStatusDict.py
+++ b/slider-agent/src/main/python/agent/CommandStatusDict.py
@@ -114,12 +114,14 @@
grep = Grep()
output = grep.tail(tmpout, Grep.OUTPUT_LAST_LINES)
inprogress = self.generate_report_template(command)
+ reportResult = CommandStatusDict.shouldReportResult(command)
inprogress.update({
'stdout': grep.filterMarkup(output),
'stderr': tmperr,
'structuredOut': tmpstructuredout,
Constants.EXIT_CODE: 777,
'status': ActionQueue.IN_PROGRESS_STATUS,
+ 'reportResult': reportResult
})
return inprogress
@@ -140,3 +142,6 @@
return stub
+ @staticmethod
+ def shouldReportResult(command):
+ return not (Constants.AUTO_GENERATED in command and command[Constants.AUTO_GENERATED])
diff --git a/slider-agent/src/main/python/agent/Constants.py b/slider-agent/src/main/python/agent/Constants.py
index 88cd564..2975266 100644
--- a/slider-agent/src/main/python/agent/Constants.py
+++ b/slider-agent/src/main/python/agent/Constants.py
@@ -29,4 +29,7 @@
AGENT_LOG_ROOT = "AGENT_LOG_ROOT"
DO_NOT_REGISTER = "DO_NOT_REGISTER"
DO_NOT_HEARTBEAT = "DO_NOT_HEARTBEAT"
-DO_NOT_HEARTBEAT_AFTER_ = "DO_NOT_HEARTBEAT_AFTER_"
\ No newline at end of file
+DO_NOT_HEARTBEAT_AFTER_ = "DO_NOT_HEARTBEAT_AFTER_"
+ZK_QUORUM="zk_quorum"
+ZK_REG_PATH="zk_reg_path"
+AUTO_GENERATED="auto_generated"
diff --git a/slider-agent/src/main/python/agent/Controller.py b/slider-agent/src/main/python/agent/Controller.py
index 92e9086..1e27efa 100644
--- a/slider-agent/src/main/python/agent/Controller.py
+++ b/slider-agent/src/main/python/agent/Controller.py
@@ -34,6 +34,7 @@
from Register import Register
from ActionQueue import ActionQueue
from NetUtil import NetUtil
+from Registry import Registry
import ssl
import ProcessHelper
import Constants
@@ -43,7 +44,12 @@
logger = logging.getLogger()
AGENT_AUTO_RESTART_EXIT_CODE = 77
+HEART_BEAT_RETRY_THRESHOLD = 2
+WS_AGENT_CONTEXT_ROOT = '/ws'
+SLIDER_PATH_AGENTS = WS_AGENT_CONTEXT_ROOT + '/v1/slider/agents/'
+SLIDER_REL_PATH_REGISTER = '/register'
+SLIDER_REL_PATH_HEARTBEAT = '/heartbeat'
class State:
INIT, INSTALLING, INSTALLED, STARTING, STARTED, FAILED = range(6)
@@ -57,13 +63,12 @@
self.safeMode = True
self.credential = None
self.config = config
- self.hostname = config.getLabel()
- server_url = 'https://' + config.get(AgentConfig.SERVER_SECTION,
- 'hostname') + \
- ':' + config.get(AgentConfig.SERVER_SECTION,
- 'secured_port')
- self.registerUrl = server_url + '/ws/v1/slider/agents/' + self.hostname + '/register'
- self.heartbeatUrl = server_url + '/ws/v1/slider/agents/' + self.hostname + '/heartbeat'
+ self.label = config.getLabel()
+ self.hostname = config.get(AgentConfig.SERVER_SECTION, 'hostname')
+ self.secured_port = config.get(AgentConfig.SERVER_SECTION, 'secured_port')
+ self.server_url = 'https://' + self.hostname + ':' + self.secured_port
+ self.registerUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_REGISTER
+ self.heartbeatUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_HEARTBEAT
self.netutil = NetUtil()
self.responseId = -1
self.repeatRegistration = False
@@ -80,6 +85,8 @@
self.componentActualState = State.INIT
self.statusCommand = None
self.failureCount = 0
+ self.heartBeatRetryCount = 0
+ self.autoRestart = False
def __del__(self):
@@ -111,7 +118,11 @@
while not self.isRegistered:
try:
- data = json.dumps(self.register.build(id))
+ data = json.dumps(self.register.build(
+ self.componentActualState,
+ self.componentExpectedState,
+ self.actionQueue.customServiceOrchestrator.allocated_ports,
+ id))
logger.info("Registering with the server at " + self.registerUrl +
" with data " + pprint.pformat(data))
response = self.sendRequest(self.registerUrl, data)
@@ -204,8 +215,8 @@
try:
if not retry:
data = json.dumps(
- self.heartbeat.build(commandResult, self.responseId,
- self.hasMappedComponents))
+ self.heartbeat.build(commandResult,
+ self.responseId, self.hasMappedComponents))
self.updateStateBasedOnResult(commandResult)
logger.debug("Sending request: " + data)
pass
@@ -218,6 +229,12 @@
serverId = int(response['responseId'])
+ restartEnabled = False
+ if 'restartEnabled' in response:
+ restartEnabled = response['restartEnabled']
+ if restartEnabled:
+ logger.info("Component auto-restart is enabled.")
+
if 'hasMappedComponents' in response.keys():
self.hasMappedComponents = response['hasMappedComponents'] != False
@@ -231,7 +248,8 @@
return
if serverId != self.responseId + 1:
- logger.error("Error in responseId sequence - restarting")
+ logger.error("Error in responseId sequence expected " + str(self.responseId + 1)
+ + " but got " + str(serverId) + " - restarting")
self.restartAgent()
else:
self.responseId = serverId
@@ -250,6 +268,19 @@
logger.info("No commands sent from the Server.")
pass
+ # Add a start command
+ if self.componentActualState == State.FAILED and \
+ self.componentExpectedState == State.STARTED and restartEnabled:
+ stored_command = self.actionQueue.customServiceOrchestrator.stored_command
+ if len(stored_command) > 0:
+ auto_start_command = self.create_start_command(stored_command)
+ if auto_start_command:
+ logger.info("Automatically adding a start command.")
+ logger.debug("Auto start command: " + pprint.pformat(auto_start_command))
+ self.updateStateBasedOnCommand([auto_start_command], False)
+ self.addToQueue([auto_start_command])
+ pass
+
# Add a status command
if (self.componentActualState != State.STARTING and \
self.componentExpectedState == State.STARTED) and \
@@ -285,9 +316,33 @@
print(
"Server certificate verify failed. Did you regenerate server certificate?")
certVerifFailed = True
+ self.heartBeatRetryCount += 1
+ logger.error(
+ "Heartbeat retry count = %d" % (self.heartBeatRetryCount))
+ # Re-read zk registry in case AM was restarted and came up with new
+ # host/port, but do this only after heartbeat retry attempts crosses
+ # threshold
+ if self.heartBeatRetryCount > HEART_BEAT_RETRY_THRESHOLD:
+ self.isRegistered = False
+ self.repeatRegistration = True
+ self.heartBeatRetryCount = 0
+ self.cachedconnect = None # Previous connection is broken now
+ zk_quorum = self.config.get(AgentConfig.SERVER_SECTION, Constants.ZK_QUORUM)
+ zk_reg_path = self.config.get(AgentConfig.SERVER_SECTION, Constants.ZK_REG_PATH)
+ registry = Registry(zk_quorum, zk_reg_path)
+ amHost, amSecuredPort = registry.readAMHostPort()
+ logger.info("Read from ZK registry: AM host = %s, AM secured port = %s" % (amHost, amSecuredPort))
+ self.hostname = amHost
+ self.secured_port = amSecuredPort
+ self.config.set(AgentConfig.SERVER_SECTION, "hostname", self.hostname)
+ self.config.set(AgentConfig.SERVER_SECTION, "secured_port", self.secured_port)
+ self.server_url = 'https://' + self.hostname + ':' + self.secured_port
+ self.registerUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_REGISTER
+ self.heartbeatUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_HEARTBEAT
+ return
self.cachedconnect = None # Previous connection is broken now
retry = True
- # Sleep for some time
+ # Sleep for some time
timeout = self.netutil.HEARTBEAT_IDDLE_INTERVAL_SEC \
- self.netutil.MINIMUM_INTERVAL_BETWEEN_HEARTBEATS
self.heartbeat_wait_event.wait(timeout=timeout)
@@ -297,13 +352,25 @@
pass
logger.info("Controller stopped heart-beating.")
- def updateStateBasedOnCommand(self, commands):
+
+ def create_start_command(self, stored_command):
+ taskId = int(stored_command['taskId'])
+ taskId = taskId + 1
+ stored_command['taskId'] = taskId
+ stored_command['commandId'] = "{0}-1".format(taskId)
+ stored_command[Constants.AUTO_GENERATED] = True
+ return stored_command
+ pass
+
+
+ def updateStateBasedOnCommand(self, commands, createStatus=True):
for command in commands:
if command["roleCommand"] == "START":
self.componentExpectedState = State.STARTED
self.componentActualState = State.STARTING
self.failureCount = 0
- self.statusCommand = self.createStatusCommand(command)
+ if createStatus:
+ self.statusCommand = self.createStatusCommand(command)
if command["roleCommand"] == "INSTALL":
self.componentExpectedState = State.INSTALLED
@@ -329,6 +396,7 @@
if "healthStatus" in commandResult:
if commandResult["healthStatus"] == "INSTALLED":
+ # Mark it FAILED as its a failure remedied by auto-start or container restart
self.componentActualState = State.FAILED
self.failureCount += 1
self.logStates()
@@ -357,9 +425,9 @@
statusCommand["hostLevelParams"] = command["hostLevelParams"]
statusCommand["serviceName"] = command["serviceName"]
statusCommand["taskId"] = "status"
- statusCommand['auto_generated'] = True
- return statusCommand
+ statusCommand[Constants.AUTO_GENERATED] = True
logger.info("Status command: " + pprint.pformat(statusCommand))
+ return statusCommand
pass
diff --git a/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py b/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py
index 6296033..15f1664 100644
--- a/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py
+++ b/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py
@@ -24,6 +24,8 @@
import pprint
import sys
import socket
+import posixpath
+import platform
from AgentConfig import AgentConfig
from AgentException import AgentException
from PythonExecutor import PythonExecutor
@@ -49,25 +51,26 @@
self.config = config
self.tmp_dir = config.getResolvedPath(AgentConfig.APP_TASK_DIR)
self.python_executor = PythonExecutor(self.tmp_dir, config)
- self.status_commands_stdout = os.path.join(self.tmp_dir,
- 'status_command_stdout.txt')
- self.status_commands_stderr = os.path.join(self.tmp_dir,
- 'status_command_stderr.txt')
+ self.status_commands_stdout = os.path.realpath(posixpath.join(self.tmp_dir,
+ 'status_command_stdout.txt'))
+ self.status_commands_stderr = os.path.realpath(posixpath.join(self.tmp_dir,
+ 'status_command_stderr.txt'))
self.public_fqdn = hostname.public_hostname()
- self.applied_configs = {}
+ self.stored_command = {}
+ self.allocated_ports = {}
# Clean up old status command files if any
try:
os.unlink(self.status_commands_stdout)
os.unlink(self.status_commands_stderr)
except OSError:
pass # Ignore fail
- self.base_dir = os.path.join(
- config.getResolvedPath(AgentConfig.APP_PACKAGE_DIR), "package")
+ self.base_dir = os.path.realpath(posixpath.join(
+ config.getResolvedPath(AgentConfig.APP_PACKAGE_DIR), "package"))
def runCommand(self, command, tmpoutfile, tmperrfile,
- override_output_files=True, store_config=False):
- allocated_port = {}
+ override_output_files=True, store_command=False):
+ allocated_ports = {}
try:
script_type = command['commandParams']['script_type']
script = command['commandParams']['script']
@@ -78,13 +81,13 @@
script_path = self.resolve_script_path(self.base_dir, script, script_type)
script_tuple = (script_path, self.base_dir)
- tmpstrucoutfile = os.path.join(self.tmp_dir,
- "structured-out-{0}.json".format(task_id))
+ tmpstrucoutfile = os.path.realpath(posixpath.join(self.tmp_dir,
+ "structured-out-{0}.json".format(task_id)))
if script_type.upper() != self.SCRIPT_TYPE_PYTHON:
# We don't support anything else yet
message = "Unknown script type {0}".format(script_type)
raise AgentException(message)
- json_path = self.dump_command_to_json(command, allocated_port, store_config)
+ json_path = self.dump_command_to_json(command, allocated_ports, store_command)
py_file_list = [script_tuple]
# filter None values
filtered_py_file_list = [i for i in py_file_list if i]
@@ -94,11 +97,15 @@
ret = None
for py_file, current_base_dir in filtered_py_file_list:
script_params = [command_name, json_path, current_base_dir]
- python_paths = [os.path.join(self.config.getWorkRootPath(),
- "infra/agent/slider-agent/jinja2"),
- os.path.join(self.config.getWorkRootPath(),
- "infra/agent/slider-agent")]
- environment_vars = [("PYTHONPATH", ":".join(python_paths))]
+ python_paths = [os.path.realpath(posixpath.join(self.config.getWorkRootPath(),
+ "infra", "agent", "slider-agent", "jinja2")),
+ os.path.realpath(posixpath.join(self.config.getWorkRootPath(),
+ "infra", "agent", "slider-agent"))]
+ if platform.system() != "Windows":
+ environment_vars = [("PYTHONPATH", ":".join(python_paths))]
+ else:
+ environment_vars = [("PYTHONPATH", ";".join(python_paths))]
+
ret = self.python_executor.run_file(py_file, script_params,
tmpoutfile, tmperrfile, timeout,
tmpstrucoutfile,
@@ -126,13 +133,14 @@
}
if Constants.EXIT_CODE in ret and ret[Constants.EXIT_CODE] == 0:
- ret[Constants.ALLOCATED_PORTS] = allocated_port
+ ret[Constants.ALLOCATED_PORTS] = allocated_ports
+ self.allocated_ports = allocated_ports
# Irrespective of the outcome report the folder paths
if command_name == 'INSTALL':
ret[Constants.FOLDERS] = {
- Constants.AGENT_LOG_ROOT : self.config.getLogPath(),
- Constants.AGENT_WORK_ROOT : self.config.getWorkRootPath()
+ Constants.AGENT_LOG_ROOT: self.config.getLogPath(),
+ Constants.AGENT_WORK_ROOT: self.config.getWorkRootPath()
}
return ret
@@ -141,29 +149,35 @@
"""
Encapsulates logic of script location determination.
"""
- path = os.path.join(base_dir, script)
+ path = os.path.realpath(posixpath.join(base_dir, script))
if not os.path.exists(path):
message = "Script {0} does not exist".format(path)
raise AgentException(message)
return path
def getConfig(self, command):
- if 'commandParams' in command and 'config_type' in command['commandParams']:
- config_type = command['commandParams']['config_type']
- logger.info("Requesting applied config for type {0}".format(config_type))
- if config_type in self.applied_configs:
- return {
- 'configurations': {config_type: self.applied_configs[config_type]}
- }
+ if 'configurations' in self.stored_command:
+ if 'commandParams' in command and 'config_type' in command['commandParams']:
+ config_type = command['commandParams']['config_type']
+ logger.info("Requesting applied config for type {0}".format(config_type))
+ if config_type in self.stored_command['configurations']:
+ return {
+ 'configurations': {config_type: self.stored_command['configurations'][config_type]}
+ }
+ else:
+ return {
+ 'configurations': {}
+ }
+ pass
else:
+ logger.info("Requesting all applied config.")
return {
- 'configurations': {}
+ 'configurations': self.stored_command['configurations']
}
pass
else:
- logger.info("Requesting all applied config.")
return {
- 'configurations': self.applied_configs
+ 'configurations': {}
}
pass
@@ -178,7 +192,7 @@
override_output_files = False
if command['roleCommand'] == "GET_CONFIG":
- return self.getConfig(command)
+ return self.getConfig(command)
else:
res = self.runCommand(command, self.status_commands_stdout,
@@ -192,7 +206,7 @@
return res
pass
- def dump_command_to_json(self, command, allocated_ports, store_config=False):
+ def dump_command_to_json(self, command, allocated_ports, store_command=False):
"""
Converts command to json file and returns file path
"""
@@ -207,15 +221,15 @@
if command_type == ActionQueue.STATUS_COMMAND:
# These files are frequently created, thats why we don't
# store them all, but only the latest one
- file_path = os.path.join(self.tmp_dir, "status_command.json")
+ file_path = os.path.realpath(posixpath.join(self.tmp_dir, "status_command.json"))
else:
task_id = command['taskId']
- file_path = os.path.join(self.tmp_dir, "command-{0}.json".format(task_id))
+ file_path = os.path.realpath(posixpath.join(self.tmp_dir, "command-{0}.json".format(task_id)))
# Json may contain passwords, that's why we need proper permissions
if os.path.isfile(file_path):
os.unlink(file_path)
- self.finalize_command(command, store_config, allocated_ports)
+ self.finalize_command(command, store_command, allocated_ports)
with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT,
0600), 'w') as f:
@@ -227,12 +241,17 @@
patch content
${AGENT_WORK_ROOT} -> AgentConfig.getWorkRootPath()
${AGENT_LOG_ROOT} -> AgentConfig.getLogPath()
+ ALLOCATED_PORT is a hint to allocate port. It works as follows:
+ Its of the form {component_name.ALLOCATED_PORT}[{DEFAULT_default_port}][{DO_NOT_PROPAGATE}]
+ Either a port gets allocated or if not then just set the value to "0"
"""
- def finalize_command(self, command, store_config, allocated_ports):
+ def finalize_command(self, command, store_command, allocated_ports):
component = command['componentName']
- allocated_port_format = "${{{0}.ALLOCATED_PORT}}"
- port_allocation_req = allocated_port_format.format(component)
+ allocated_for_this_component_format = "${{{0}.ALLOCATED_PORT}}"
+ allocated_for_any = ".ALLOCATED_PORT}"
+
+ port_allocation_req = allocated_for_this_component_format.format(component)
if 'configurations' in command:
for key in command['configurations']:
if len(command['configurations'][key]) > 0:
@@ -243,10 +262,12 @@
value = value.replace("${AGENT_LOG_ROOT}",
self.config.getLogPath())
if port_allocation_req in value:
- port = self.allocate_port()
- value = value.replace(port_allocation_req, str(port))
- logger.info("Allocated port " + str(port) + " for " + port_allocation_req)
- allocated_ports[k] = value
+ value = self.allocate_ports(value, port_allocation_req)
+ allocated_ports[key + "." + k] = value
+ elif allocated_for_any in value:
+ ## All unallocated ports should be set to 0
+ logger.info("Assigning port 0 " + "for " + value)
+ value = self.set_all_unallocated_ports(value)
command['configurations'][key][k] = value
pass
pass
@@ -254,13 +275,83 @@
pass
pass
- if store_config:
+ if store_command:
logger.info("Storing applied config: " + pprint.pformat(command['configurations']))
- self.applied_configs = command['configurations']
+ self.stored_command = command
pass
- def allocate_port(self):
+ """
+ All unallocated ports should be set to 0
+ Look for "${SOME_COMPONENT_NAME.ALLOCATED_PORT}"
+ or "${component.ALLOCATED_PORT}{DEFAULT_port}"
+ or "${component.ALLOCATED_PORT}{DEFAULT_port}{DO_NOT_PROPAGATE}"
+ """
+
+ def set_all_unallocated_ports(self, value):
+ pattern_start = "${"
+ sub_section_start = "}{"
+ pattern_end = "}"
+ index = value.find(pattern_start)
+ while index != -1:
+ replace_index_start = index
+ replace_index_end = value.find(pattern_end, replace_index_start)
+ next_pattern_start = value.find(sub_section_start, replace_index_start)
+ while next_pattern_start == replace_index_end:
+ replace_index_end = value.find(pattern_end, replace_index_end + 1)
+ next_pattern_start = value.find(sub_section_start, next_pattern_start + 1)
+ pass
+
+ value = value[:replace_index_start] + "0" + value[replace_index_end + 1:]
+
+ # look for the next
+ index = value.find(pattern_start)
+
+ return value
+ pass
+
+ """
+ Port allocation can asks for multiple dynamic ports
+ port_req_pattern is of type ${component_name.ALLOCATED_PORT}
+ append {DEFAULT_ and find the default value
+ append {DO_NOT_PROPAGATE} if it exists
+ """
+ def allocate_ports(self, value, port_req_pattern):
+ default_port_pattern = "{DEFAULT_"
+ do_not_propagate_pattern = "{DO_NOT_PROPAGATE}"
+ index = value.find(port_req_pattern)
+ while index != -1:
+ replaced_pattern = port_req_pattern
+ def_port = None
+ if index == value.find(port_req_pattern + default_port_pattern):
+ replaced_pattern = port_req_pattern + default_port_pattern
+ start_index = index + len(replaced_pattern)
+ end_index = value.find("}", start_index)
+ def_port_str = value[start_index:end_index]
+ def_port = int(def_port_str)
+ # default value of 0 means allocate any dynamic port
+ if def_port == 0:
+ def_port = None
+
+ replaced_pattern = replaced_pattern + def_port_str + "}"
+ pass
+ if index == value.find(replaced_pattern + do_not_propagate_pattern):
+ replaced_pattern = replaced_pattern + do_not_propagate_pattern
+ pass
+ port = self.allocate_port(def_port)
+ value = value.replace(replaced_pattern, str(port), 1)
+ logger.info("Allocated port " + str(port) + " for " + replaced_pattern)
+ index = value.find(port_req_pattern)
+ pass
+ return value
+ pass
+
+
+ def allocate_port(self, default_port=None):
+ if default_port != None:
+ if self.is_port_available(default_port):
+ return default_port
+
MAX_ATTEMPT = 5
iter = 0
port = -1
@@ -278,4 +369,14 @@
logger.info("Allocated dynamic port: " + str(port))
return port
+ def is_port_available(self, port):
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.settimeout(0.2)
+ sock.connect(('127.0.0.1', port))
+ sock.close()
+ except:
+ return True
+ return False
+
diff --git a/slider-agent/src/main/python/agent/Heartbeat.py b/slider-agent/src/main/python/agent/Heartbeat.py
index 8192348..b107d92 100644
--- a/slider-agent/src/main/python/agent/Heartbeat.py
+++ b/slider-agent/src/main/python/agent/Heartbeat.py
@@ -36,7 +36,8 @@
self.config = config
self.reports = []
- def build(self, commandResult, id='-1', componentsMapped=False):
+ def build(self, commandResult, id='-1',
+ componentsMapped=False):
timestamp = int(time.time() * 1000)
queueResult = self.actionQueue.result()
logger.info("Queue result: " + pformat(queueResult))
@@ -55,7 +56,15 @@
if not self.actionQueue.commandQueue.empty():
commandsInProgress = True
if len(queueResult) != 0:
- heartbeat['reports'] = queueResult['reports']
+ heartbeat['reports'] = []
+ for report in queueResult['reports']:
+ if report['reportResult']:
+ del report['reportResult']
+ heartbeat['reports'].append(report)
+ else:
+ # dropping the result but only recording the status
+ commandResult["commandStatus"] = report["status"]
+ pass
if len(heartbeat['reports']) > 0:
# There may be IN_PROGRESS tasks
commandsInProgress = True
diff --git a/slider-agent/src/main/python/agent/ProcessHelper.py b/slider-agent/src/main/python/agent/ProcessHelper.py
index b6283b0..467c4d8 100644
--- a/slider-agent/src/main/python/agent/ProcessHelper.py
+++ b/slider-agent/src/main/python/agent/ProcessHelper.py
@@ -22,12 +22,13 @@
import logging
import traceback
import sys
+import posixpath
from shell import getTempFiles
logger = logging.getLogger()
if 'AGENT_WORK_ROOT' in os.environ:
- pidfile = os.path.join(os.environ['AGENT_WORK_ROOT'], "infra/run/agent.pid")
+ pidfile = os.path.realpath(posixpath.join(os.environ['AGENT_WORK_ROOT'], "infra", "run", "agent.pid"))
else:
pidfile = None
diff --git a/slider-agent/src/main/python/agent/PythonExecutor.py b/slider-agent/src/main/python/agent/PythonExecutor.py
index 5f29e5e..54ce247 100644
--- a/slider-agent/src/main/python/agent/PythonExecutor.py
+++ b/slider-agent/src/main/python/agent/PythonExecutor.py
@@ -28,6 +28,7 @@
from Grep import Grep
import shell
import sys
+import platform
import Constants
@@ -125,6 +126,7 @@
Creates subprocess with given parameters. This functionality was moved to separate method
to make possible unit testing
"""
+ close_fds = None if platform.system() == "Windows" else True
env = os.environ.copy()
if environment_vars:
for k, v in environment_vars:
@@ -132,13 +134,14 @@
env[k] = v
return subprocess.Popen(command,
stdout=tmpout,
- stderr=tmperr, close_fds=True, env=env)
+ stderr=tmperr, close_fds=close_fds, env=env)
def isSuccessfull(self, returncode):
return not self.python_process_has_been_killed and returncode == 0
def python_command(self, script, script_params):
- python_binary = sys.executable
+ #we need manually pass python executable on windows because sys.executable will return service wrapper
+ python_binary = os.environ['PYTHON_EXE'] if 'PYTHON_EXE' in os.environ else sys.executable
python_command = [python_binary, "-S", script] + script_params
return python_command
diff --git a/slider-agent/src/main/python/agent/Register.py b/slider-agent/src/main/python/agent/Register.py
index 7c7ff06..b59154f 100644
--- a/slider-agent/src/main/python/agent/Register.py
+++ b/slider-agent/src/main/python/agent/Register.py
@@ -29,7 +29,7 @@
def __init__(self, config):
self.config = config
- def build(self, id='-1'):
+ def build(self, actualState, expectedState, allocated_ports, id='-1'):
timestamp = int(time.time() * 1000)
version = self.read_agent_version()
@@ -38,7 +38,10 @@
'timestamp': timestamp,
'hostname': self.config.getLabel(),
'publicHostname': hostname.public_hostname(),
- 'agentVersion': version
+ 'agentVersion': version,
+ 'actualState': actualState,
+ 'expectedState': expectedState,
+ 'allocatedPorts': allocated_ports
}
return register
diff --git a/slider-agent/src/main/python/agent/Registry.py b/slider-agent/src/main/python/agent/Registry.py
new file mode 100644
index 0000000..37736fe
--- /dev/null
+++ b/slider-agent/src/main/python/agent/Registry.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+'''
+
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import json
+import logging
+from kazoo.client import KazooClient
+
+logger = logging.getLogger()
+
+class Registry:
+ def __init__(self, zk_quorum, zk_reg_path):
+ self.zk_quorum = zk_quorum
+ self.zk_reg_path = zk_reg_path
+
+ def readAMHostPort(self):
+ amHost = ""
+ amSecuredPort = ""
+ zk = None
+ try:
+ zk = KazooClient(hosts=self.zk_quorum, read_only=True)
+ zk.start()
+ data, stat = zk.get(self.zk_reg_path)
+ logger.debug("Registry Data: %s" % (data.decode("utf-8")))
+ sliderRegistry = json.loads(data)
+ amUrl = sliderRegistry["payload"]["internalView"]["endpoints"]["org.apache.slider.agents"]["address"]
+ amHost = amUrl.split("/")[2].split(":")[0]
+ amSecuredPort = amUrl.split(":")[2].split("/")[0]
+ # the port needs to be utf-8 encoded
+ amSecuredPort = amSecuredPort.encode('utf8', 'ignore')
+ except Exception:
+ # log and let empty strings be returned
+ logger.error("Could not connect to zk registry at %s in quorum %s" %
+ (self.zk_reg_path, self.zk_quorum))
+ pass
+ finally:
+ if not zk == None:
+ zk.stop()
+ zk.close()
+ logger.info("AM Host = %s, AM Secured Port = %s" % (amHost, amSecuredPort))
+ return amHost, amSecuredPort
diff --git a/slider-agent/src/main/python/agent/main.py b/slider-agent/src/main/python/agent/main.py
index 12e07ba..f68db04 100644
--- a/slider-agent/src/main/python/agent/main.py
+++ b/slider-agent/src/main/python/agent/main.py
@@ -26,13 +26,19 @@
import traceback
import os
import time
-import errno
+import platform
+import ConfigParser
import ProcessHelper
+import errno
+import posixpath
from Controller import Controller
from AgentConfig import AgentConfig
from NetUtil import NetUtil
+from Registry import Registry
+import Constants
logger = logging.getLogger()
+IS_WINDOWS = platform.system() == "Windows"
formatstr = "%(levelname)s %(asctime)s %(filename)s:%(lineno)d - %(message)s"
agentPid = os.getpid()
@@ -99,12 +105,13 @@
def bind_signal_handlers():
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
- signal.signal(signal.SIGUSR1, debug)
+ if platform.system() != "Windows":
+ signal.signal(signal.SIGUSR1, debug)
def update_config_from_file(agentConfig):
try:
- configFile = os.path.join(agentConfig.getWorkRootPath(), configFileRelPath)
+ configFile = posixpath.join(agentConfig.getWorkRootPath(), configFileRelPath)
if os.path.exists(configFile):
agentConfig.setConfig(configFile)
else:
@@ -136,7 +143,7 @@
def ensure_path_exists(path):
try:
- os.makedirs(path)
+ os.makedirs(os.path.realpath(path))
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
@@ -173,46 +180,58 @@
parser = OptionParser()
parser.add_option("-v", "--verbose", dest="verbose", help="verbose log output", default=False)
parser.add_option("-l", "--label", dest="label", help="label of the agent", default=None)
- parser.add_option("--host", dest="host", help="AppMaster host", default=None)
- parser.add_option("--port", dest="port", help="AppMaster port", default=None)
- parser.add_option("--secured_port", dest="secured_port", help="AppMaster 2 Way port", default=None)
+ parser.add_option("--zk-quorum", dest=Constants.ZK_QUORUM, help="Zookeeper Quorum", default=None)
+ parser.add_option("--zk-reg-path", dest=Constants.ZK_REG_PATH, help="Zookeeper Registry Path", default=None)
parser.add_option("--debug", dest="debug", help="Agent debug hint", default="")
(options, args) = parser.parse_args()
if not 'AGENT_WORK_ROOT' in os.environ:
- parser.error("AGENT_WORK_ROOT environment variable must be set.");
+ parser.error("AGENT_WORK_ROOT environment variable must be set.")
options.root_folder = os.environ['AGENT_WORK_ROOT']
if not 'AGENT_LOG_ROOT' in os.environ:
- parser.error("AGENT_LOG_ROOT environment variable must be set.");
+ parser.error("AGENT_LOG_ROOT environment variable must be set.")
options.log_folder = os.environ['AGENT_LOG_ROOT']
+ all_log_folders = [x.strip() for x in options.log_folder.split(',')]
+ if len(all_log_folders) > 1:
+ options.log_folder = all_log_folders[0]
+
+ # If there are multiple log folder, separate by comma, pick one
+
if not options.label:
parser.error("label is required.");
- bind_signal_handlers()
+ if not IS_WINDOWS:
+ bind_signal_handlers()
# Check for configuration file.
agentConfig = AgentConfig(options.root_folder, options.log_folder, options.label)
update_config_from_file(agentConfig)
# update configurations if needed
- if options.host:
- agentConfig.set(AgentConfig.SERVER_SECTION, "hostname", options.host)
+ if options.zk_quorum:
+ agentConfig.set(AgentConfig.SERVER_SECTION, Constants.ZK_QUORUM, options.zk_quorum)
- if options.port:
- agentConfig.set(AgentConfig.SERVER_SECTION, "port", options.port)
-
- if options.secured_port:
- agentConfig.set(AgentConfig.SERVER_SECTION, "secured_port", options.secured_port)
+ if options.zk_reg_path:
+ agentConfig.set(AgentConfig.SERVER_SECTION, Constants.ZK_REG_PATH, options.zk_reg_path)
if options.debug:
agentConfig.set(AgentConfig.AGENT_SECTION, AgentConfig.APP_DBG_CMD, options.debug)
+ # Extract the AM hostname and secured port from ZK registry
+ registry = Registry(options.zk_quorum, options.zk_reg_path)
+ amHost, amSecuredPort = registry.readAMHostPort()
+ if amHost:
+ agentConfig.set(AgentConfig.SERVER_SECTION, "hostname", amHost)
+
+ if amSecuredPort:
+ agentConfig.set(AgentConfig.SERVER_SECTION, "secured_port", amSecuredPort)
+
# set the security directory to a subdirectory of the run dir
- secDir = os.path.join(agentConfig.getResolvedPath(AgentConfig.RUN_DIR), "security")
+ secDir = posixpath.join(agentConfig.getResolvedPath(AgentConfig.RUN_DIR), "security")
logger.info("Security/Keys directory: " + secDir)
agentConfig.set(AgentConfig.SECURITY_SECTION, "keysdir", secDir)
- logFile = os.path.join(agentConfig.getResolvedPath(AgentConfig.LOG_DIR), logFileName)
+ logFile = posixpath.join(agentConfig.getResolvedPath(AgentConfig.LOG_DIR), logFileName)
perform_prestart_checks(agentConfig)
ensure_folder_layout(agentConfig)
@@ -226,9 +245,12 @@
logger.info("Using AGENT_WORK_ROOT = " + options.root_folder)
logger.info("Using AGENT_LOG_ROOT = " + options.log_folder)
+ if len(all_log_folders) > 1:
+ logger.info("Selected log folder from available: " + ",".join(all_log_folders))
+
server_url = SERVER_STATUS_URL.format(
agentConfig.get(AgentConfig.SERVER_SECTION, 'hostname'),
- agentConfig.get(AgentConfig.SERVER_SECTION, 'port'),
+ agentConfig.get(AgentConfig.SERVER_SECTION, 'secured_port'),
agentConfig.get(AgentConfig.SERVER_SECTION, 'check_path'))
print("Connecting to the server at " + server_url + "...")
logger.info('Connecting to the server at: ' + server_url)
diff --git a/slider-agent/src/main/python/agent/shell.py b/slider-agent/src/main/python/agent/shell.py
index d339764..446dde9 100644
--- a/slider-agent/src/main/python/agent/shell.py
+++ b/slider-agent/src/main/python/agent/shell.py
@@ -28,16 +28,19 @@
import time
import traceback
import pprint
+import platform
-try:
+if platform.system() != "Windows":
+ try:
import pwd
-except ImportError:
+ except ImportError:
import winpwd as pwd
global serverTracker
serverTracker = {}
logger = logging.getLogger()
+shellRunner = None
threadLocal = threading.local()
gracefull_kill_delay = 5 # seconds between SIGTERM and SIGKILL
tempFiles = []
@@ -47,7 +50,51 @@
def getTempFiles():
return tempFiles
-def kill_process_with_children(parent_pid):
+class _dict_to_object:
+ def __init__(self, entries):
+ self.__dict__.update(entries)
+ def __getitem__(self, item):
+ return self.__dict__[item]
+# windows specific code
+def _kill_process_with_children_windows(parent_pid):
+ shellRunner().run(["taskkill", "/T", "/PID", "{0}".format(parent_pid)])
+
+
+class shellRunnerWindows:
+ # Run any command
+ def run(self, script, user=None):
+ logger.warn("user argument ignored on windows")
+ code = 0
+ if not isinstance(script, list):
+ cmd = " "
+ cmd = cmd.join(script)
+ else:
+ cmd = script
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, shell=False)
+ out, err = p.communicate()
+ code = p.wait()
+ logger.debug("Exitcode for %s is %d" % (cmd, code))
+ return {'exitCode': code, 'output': out, 'error': err}
+
+ def runPowershell(self, file=None, script_block=None, args=[]):
+ logger.warn("user argument ignored on windows")
+ code = 0
+ cmd = None
+ if file:
+ cmd = ['powershell', '-WindowStyle', 'Hidden', '-File', file] + args
+ elif script_block:
+ cmd = ['powershell', '-WindowStyle', 'Hidden', '-Command', script_block] + args
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, shell=False)
+ out, err = p.communicate()
+ code = p.wait()
+ logger.debug("Exitcode for %s is %d" % (cmd, code))
+ return _dict_to_object({'exitCode': code, 'output': out, 'error': err})
+
+
+#linux specific code
+def _kill_process_with_children_linux(parent_pid):
def kill_tree_function(pid, signal):
'''
Kills process tree starting from a given pid.
@@ -57,15 +104,15 @@
# a given PID and then passes list of "kill -<SIGNAL> PID" commands to 'sh'
# shell.
CMD = """ps xf | awk -v PID=""" + str(pid) + \
- """ ' $1 == PID { P = $1; next } P && /_/ { P = P " " $1;""" + \
- """K=P } P && !/_/ { P="" } END { print "kill -""" \
- + str(signal) + """ "K }' | sh """
+ """ ' $1 == PID { P = $1; next } P && /_/ { P = P " " $1;""" + \
+ """K=P } P && !/_/ { P="" } END { print "kill -""" \
+ + str(signal) + """ "K }' | sh """
process = subprocess.Popen(CMD, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
process.communicate()
- run_kill_function(kill_tree_function, parent_pid)
+ _run_kill_function(kill_tree_function, parent_pid)
-def run_kill_function(kill_function, pid):
+def _run_kill_function(kill_function, pid):
try:
kill_function(pid, signal.SIGTERM)
except Exception, e:
@@ -80,8 +127,43 @@
logger.error("Failed to send SIGKILL to PID %d. Process exited?" % (pid))
logger.error("Reported error: " + repr(e))
-def changeUid():
+def _changeUid():
try:
os.setuid(threadLocal.uid)
except Exception:
- logger.warn("can not switch user for running command.")
\ No newline at end of file
+ logger.warn("can not switch user for running command.")
+
+
+class shellRunnerLinux:
+ # Run any command
+ def run(self, script, user=None):
+ try:
+ if user != None:
+ user = pwd.getpwnam(user)[2]
+ else:
+ user = os.getuid()
+ threadLocal.uid = user
+ except Exception:
+ logger.warn("can not switch user for RUN_COMMAND.")
+ code = 0
+ cmd = " "
+ cmd = cmd.join(script)
+ p = subprocess.Popen(cmd, preexec_fn=_changeUid, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, shell=True, close_fds=True)
+ out, err = p.communicate()
+ code = p.wait()
+ logger.debug("Exitcode for %s is %d" % (cmd, code))
+ return {'exitCode': code, 'output': out, 'error': err}
+
+
+def kill_process_with_children(parent_pid):
+ if platform.system() == "Windows":
+ _kill_process_with_children_windows(parent_pid)
+ else:
+ _kill_process_with_children_linux(parent_pid)
+
+
+if platform.system() == "Windows":
+ shellRunner = shellRunnerWindows
+else:
+ shellRunner = shellRunnerLinux
\ No newline at end of file
diff --git a/slider-agent/src/main/python/kazoo/LICENSE b/slider-agent/src/main/python/kazoo/LICENSE
new file mode 100644
index 0000000..68c771a
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/LICENSE
@@ -0,0 +1,176 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
diff --git a/slider-agent/src/main/python/kazoo/__init__.py b/slider-agent/src/main/python/kazoo/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/client.py b/slider-agent/src/main/python/kazoo/client.py
new file mode 100644
index 0000000..a315489
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/client.py
@@ -0,0 +1,1413 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Zookeeper Client"""
+import inspect
+import logging
+import os
+import re
+import warnings
+from collections import defaultdict, deque
+from functools import partial
+from os.path import split
+
+from kazoo.exceptions import (
+ AuthFailedError,
+ ConfigurationError,
+ ConnectionClosedError,
+ ConnectionLoss,
+ NoNodeError,
+ NodeExistsError,
+ SessionExpiredError,
+ WriterNotClosedException,
+)
+from kazoo.handlers.threading import SequentialThreadingHandler
+from kazoo.handlers.utils import capture_exceptions, wrap
+from kazoo.hosts import collect_hosts
+from kazoo.loggingsupport import BLATHER
+from kazoo.protocol.connection import ConnectionHandler
+from kazoo.protocol.paths import normpath
+from kazoo.protocol.paths import _prefix_root
+from kazoo.protocol.serialization import (
+ Auth,
+ CheckVersion,
+ CloseInstance,
+ Create,
+ Delete,
+ Exists,
+ GetChildren,
+ GetChildren2,
+ GetACL,
+ SetACL,
+ GetData,
+ SetData,
+ Sync,
+ Transaction
+)
+from kazoo.protocol.states import KazooState
+from kazoo.protocol.states import KeeperState
+from kazoo.retry import KazooRetry
+from kazoo.security import ACL
+from kazoo.security import OPEN_ACL_UNSAFE
+
+# convenience API
+from kazoo.recipe.barrier import Barrier
+from kazoo.recipe.barrier import DoubleBarrier
+from kazoo.recipe.counter import Counter
+from kazoo.recipe.election import Election
+from kazoo.recipe.lock import Lock
+from kazoo.recipe.lock import Semaphore
+from kazoo.recipe.partitioner import SetPartitioner
+from kazoo.recipe.party import Party
+from kazoo.recipe.party import ShallowParty
+from kazoo.recipe.queue import Queue
+from kazoo.recipe.queue import LockingQueue
+from kazoo.recipe.watchers import ChildrenWatch
+from kazoo.recipe.watchers import DataWatch
+
+try: # pragma: nocover
+ basestring
+except NameError: # pragma: nocover
+ basestring = str
+
+LOST_STATES = (KeeperState.EXPIRED_SESSION, KeeperState.AUTH_FAILED,
+ KeeperState.CLOSED)
+ENVI_VERSION = re.compile('[\w\s:.]*=([\d\.]*).*', re.DOTALL)
+log = logging.getLogger(__name__)
+
+
+_RETRY_COMPAT_DEFAULTS = dict(
+ max_retries=None,
+ retry_delay=0.1,
+ retry_backoff=2,
+ retry_jitter=0.8,
+ retry_max_delay=3600,
+)
+
+_RETRY_COMPAT_MAPPING = dict(
+ max_retries='max_tries',
+ retry_delay='delay',
+ retry_backoff='backoff',
+ retry_jitter='max_jitter',
+ retry_max_delay='max_delay',
+)
+
+
+class KazooClient(object):
+ """An Apache Zookeeper Python client supporting alternate callback
+ handlers and high-level functionality.
+
+ Watch functions registered with this class will not get session
+ events, unlike the default Zookeeper watches. They will also be
+ called with a single argument, a
+ :class:`~kazoo.protocol.states.WatchedEvent` instance.
+
+ """
+ def __init__(self, hosts='127.0.0.1:2181',
+ timeout=10.0, client_id=None, handler=None,
+ default_acl=None, auth_data=None, read_only=None,
+ randomize_hosts=True, connection_retry=None,
+ command_retry=None, logger=None, **kwargs):
+ """Create a :class:`KazooClient` instance. All time arguments
+ are in seconds.
+
+ :param hosts: Comma-separated list of hosts to connect to
+ (e.g. 127.0.0.1:2181,127.0.0.1:2182,[::1]:2183).
+ :param timeout: The longest to wait for a Zookeeper connection.
+ :param client_id: A Zookeeper client id, used when
+ re-establishing a prior session connection.
+ :param handler: An instance of a class implementing the
+ :class:`~kazoo.interfaces.IHandler` interface
+ for callback handling.
+ :param default_acl: A default ACL used on node creation.
+ :param auth_data:
+ A list of authentication credentials to use for the
+ connection. Should be a list of (scheme, credential)
+ tuples as :meth:`add_auth` takes.
+ :param read_only: Allow connections to read only servers.
+ :param randomize_hosts: By default randomize host selection.
+ :param connection_retry:
+ A :class:`kazoo.retry.KazooRetry` object to use for
+ retrying the connection to Zookeeper. Also can be a dict of
+ options which will be used for creating one.
+ :param command_retry:
+ A :class:`kazoo.retry.KazooRetry` object to use for
+ the :meth:`KazooClient.retry` method. Also can be a dict of
+ options which will be used for creating one.
+ :param logger: A custom logger to use instead of the module
+ global `log` instance.
+
+ Basic Example:
+
+ .. code-block:: python
+
+ zk = KazooClient()
+ zk.start()
+ children = zk.get_children('/')
+ zk.stop()
+
+ As a convenience all recipe classes are available as attributes
+ and get automatically bound to the client. For example::
+
+ zk = KazooClient()
+ zk.start()
+ lock = zk.Lock('/lock_path')
+
+ .. versionadded:: 0.6
+ The read_only option. Requires Zookeeper 3.4+
+
+ .. versionadded:: 0.6
+ The retry_max_delay option.
+
+ .. versionadded:: 0.6
+ The randomize_hosts option.
+
+ .. versionchanged:: 0.8
+ Removed the unused watcher argument (was second argument).
+
+ .. versionadded:: 1.2
+ The connection_retry, command_retry and logger options.
+
+ """
+ self.logger = logger or log
+
+ # Record the handler strategy used
+ self.handler = handler if handler else SequentialThreadingHandler()
+ if inspect.isclass(self.handler):
+ raise ConfigurationError("Handler must be an instance of a class, "
+ "not the class: %s" % self.handler)
+
+ self.auth_data = auth_data if auth_data else set([])
+ self.default_acl = default_acl
+ self.randomize_hosts = randomize_hosts
+ self.hosts = None
+ self.chroot = None
+ self.set_hosts(hosts)
+
+ # Curator like simplified state tracking, and listeners for
+ # state transitions
+ self._state = KeeperState.CLOSED
+ self.state = KazooState.LOST
+ self.state_listeners = set()
+
+ self._reset()
+ self.read_only = read_only
+
+ if client_id:
+ self._session_id = client_id[0]
+ self._session_passwd = client_id[1]
+ else:
+ self._reset_session()
+
+ # ZK uses milliseconds
+ self._session_timeout = int(timeout * 1000)
+
+ # We use events like twitter's client to track current and
+ # desired state (connected, and whether to shutdown)
+ self._live = self.handler.event_object()
+ self._writer_stopped = self.handler.event_object()
+ self._stopped = self.handler.event_object()
+ self._stopped.set()
+ self._writer_stopped.set()
+
+ self.retry = self._conn_retry = None
+
+ if type(connection_retry) is dict:
+ self._conn_retry = KazooRetry(**connection_retry)
+ elif type(connection_retry) is KazooRetry:
+ self._conn_retry = connection_retry
+
+ if type(command_retry) is dict:
+ self.retry = KazooRetry(**command_retry)
+ elif type(command_retry) is KazooRetry:
+ self.retry = command_retry
+
+
+ if type(self._conn_retry) is KazooRetry:
+ if self.handler.sleep_func != self._conn_retry.sleep_func:
+ raise ConfigurationError("Retry handler and event handler "
+ " must use the same sleep func")
+
+ if type(self.retry) is KazooRetry:
+ if self.handler.sleep_func != self.retry.sleep_func:
+ raise ConfigurationError("Command retry handler and event "
+ "handler must use the same sleep func")
+
+ if self.retry is None or self._conn_retry is None:
+ old_retry_keys = dict(_RETRY_COMPAT_DEFAULTS)
+ for key in old_retry_keys:
+ try:
+ old_retry_keys[key] = kwargs.pop(key)
+ warnings.warn('Passing retry configuration param %s to the'
+ ' client directly is deprecated, please pass a'
+ ' configured retry object (using param %s)' % (
+ key, _RETRY_COMPAT_MAPPING[key]),
+ DeprecationWarning, stacklevel=2)
+ except KeyError:
+ pass
+
+ retry_keys = {}
+ for oldname, value in old_retry_keys.items():
+ retry_keys[_RETRY_COMPAT_MAPPING[oldname]] = value
+
+ if self._conn_retry is None:
+ self._conn_retry = KazooRetry(
+ sleep_func=self.handler.sleep_func,
+ **retry_keys)
+ if self.retry is None:
+ self.retry = KazooRetry(
+ sleep_func=self.handler.sleep_func,
+ **retry_keys)
+
+ self._conn_retry.interrupt = lambda: self._stopped.is_set()
+ self._connection = ConnectionHandler(self, self._conn_retry.copy(),
+ logger=self.logger)
+
+ # Every retry call should have its own copy of the retry helper
+ # to avoid shared retry counts
+ self._retry = self.retry
+ def _retry(*args, **kwargs):
+ return self._retry.copy()(*args, **kwargs)
+ self.retry = _retry
+
+ self.Barrier = partial(Barrier, self)
+ self.Counter = partial(Counter, self)
+ self.DoubleBarrier = partial(DoubleBarrier, self)
+ self.ChildrenWatch = partial(ChildrenWatch, self)
+ self.DataWatch = partial(DataWatch, self)
+ self.Election = partial(Election, self)
+ self.Lock = partial(Lock, self)
+ self.Party = partial(Party, self)
+ self.Queue = partial(Queue, self)
+ self.LockingQueue = partial(LockingQueue, self)
+ self.SetPartitioner = partial(SetPartitioner, self)
+ self.Semaphore = partial(Semaphore, self)
+ self.ShallowParty = partial(ShallowParty, self)
+
+ # If we got any unhandled keywords, complain like python would
+ if kwargs:
+ raise TypeError('__init__() got unexpected keyword arguments: %s'
+ % (kwargs.keys(),))
+
+ def _reset(self):
+ """Resets a variety of client states for a new connection."""
+ self._queue = deque()
+ self._pending = deque()
+
+ self._reset_watchers()
+ self._reset_session()
+ self.last_zxid = 0
+ self._protocol_version = None
+
+ def _reset_watchers(self):
+ self._child_watchers = defaultdict(set)
+ self._data_watchers = defaultdict(set)
+
+ def _reset_session(self):
+ self._session_id = None
+ self._session_passwd = b'\x00' * 16
+
+ @property
+ def client_state(self):
+ """Returns the last Zookeeper client state
+
+ This is the non-simplified state information and is generally
+ not as useful as the simplified KazooState information.
+
+ """
+ return self._state
+
+ @property
+ def client_id(self):
+ """Returns the client id for this Zookeeper session if
+ connected.
+
+ :returns: client id which consists of the session id and
+ password.
+ :rtype: tuple
+ """
+ if self._live.is_set():
+ return (self._session_id, self._session_passwd)
+ return None
+
+ @property
+ def connected(self):
+ """Returns whether the Zookeeper connection has been
+ established."""
+ return self._live.is_set()
+
+ def set_hosts(self, hosts, randomize_hosts=None):
+ """ sets the list of hosts used by this client.
+
+ This function accepts the same format hosts parameter as the init
+ function and sets the client to use the new hosts the next time it
+ needs to look up a set of hosts. This function does not affect the
+ current connected status.
+
+ It is not currently possible to change the chroot with this function,
+ setting a host list with a new chroot will raise a ConfigurationError.
+
+ :param hosts: see description in :meth:`KazooClient.__init__`
+ :param randomize_hosts: override client default for host randomization
+ :raises:
+ :exc:`ConfigurationError` if the hosts argument changes the chroot
+
+ .. versionadded:: 1.4
+
+ .. warning::
+
+ Using this function to point a client to a completely disparate
+ zookeeper server cluster has undefined behavior.
+
+ """
+
+ if randomize_hosts is None:
+ randomize_hosts = self.randomize_hosts
+
+ self.hosts, chroot = collect_hosts(hosts, randomize_hosts)
+
+ if chroot:
+ new_chroot = normpath(chroot)
+ else:
+ new_chroot = ''
+
+ if self.chroot is not None and new_chroot != self.chroot:
+ raise ConfigurationError("Changing chroot at runtime is not "
+ "currently supported")
+
+ self.chroot = new_chroot
+
+ def add_listener(self, listener):
+ """Add a function to be called for connection state changes.
+
+ This function will be called with a
+ :class:`~kazoo.protocol.states.KazooState` instance indicating
+ the new connection state on state transitions.
+
+ .. warning::
+
+ This function must not block. If its at all likely that it
+ might need data or a value that could result in blocking
+ than the :meth:`~kazoo.interfaces.IHandler.spawn` method
+ should be used so that the listener can return immediately.
+
+ """
+ if not (listener and callable(listener)):
+ raise ConfigurationError("listener must be callable")
+ self.state_listeners.add(listener)
+
+ def remove_listener(self, listener):
+ """Remove a listener function"""
+ self.state_listeners.discard(listener)
+
+ def _make_state_change(self, state):
+ # skip if state is current
+ if self.state == state:
+ return
+
+ self.state = state
+
+ # Create copy of listeners for iteration in case one needs to
+ # remove itself
+ for listener in list(self.state_listeners):
+ try:
+ remove = listener(state)
+ if remove is True:
+ self.remove_listener(listener)
+ except Exception:
+ self.logger.exception("Error in connection state listener")
+
+ def _session_callback(self, state):
+ if state == self._state:
+ return
+
+ # Note that we don't check self.state == LOST since that's also
+ # the client's initial state
+ dead_state = self._state in LOST_STATES
+ self._state = state
+
+ # If we were previously closed or had an expired session, and
+ # are now connecting, don't bother with the rest of the
+ # transitions since they only apply after
+ # we've established a connection
+ if dead_state and state == KeeperState.CONNECTING:
+ self.logger.log(BLATHER, "Skipping state change")
+ return
+
+ if state in (KeeperState.CONNECTED, KeeperState.CONNECTED_RO):
+ self.logger.info("Zookeeper connection established, state: %s", state)
+ self._live.set()
+ self._make_state_change(KazooState.CONNECTED)
+ elif state in LOST_STATES:
+ self.logger.info("Zookeeper session lost, state: %s", state)
+ self._live.clear()
+ self._make_state_change(KazooState.LOST)
+ self._notify_pending(state)
+ self._reset()
+ else:
+ self.logger.info("Zookeeper connection lost")
+ # Connection lost
+ self._live.clear()
+ self._notify_pending(state)
+ self._make_state_change(KazooState.SUSPENDED)
+ self._reset_watchers()
+
+ def _notify_pending(self, state):
+ """Used to clear a pending response queue and request queue
+ during connection drops."""
+ if state == KeeperState.AUTH_FAILED:
+ exc = AuthFailedError()
+ elif state == KeeperState.EXPIRED_SESSION:
+ exc = SessionExpiredError()
+ else:
+ exc = ConnectionLoss()
+
+ while True:
+ try:
+ request, async_object, xid = self._pending.popleft()
+ if async_object:
+ async_object.set_exception(exc)
+ except IndexError:
+ break
+
+ while True:
+ try:
+ request, async_object = self._queue.popleft()
+ if async_object:
+ async_object.set_exception(exc)
+ except IndexError:
+ break
+
+ def _safe_close(self):
+ self.handler.stop()
+ timeout = self._session_timeout // 1000
+ if timeout < 10:
+ timeout = 10
+ if not self._connection.stop(timeout):
+ raise WriterNotClosedException(
+ "Writer still open from prior connection "
+ "and wouldn't close after %s seconds" % timeout)
+
+ def _call(self, request, async_object):
+ """Ensure there's an active connection and put the request in
+ the queue if there is.
+
+ Returns False if the call short circuits due to AUTH_FAILED,
+ CLOSED, EXPIRED_SESSION or CONNECTING state.
+
+ """
+
+ if self._state == KeeperState.AUTH_FAILED:
+ async_object.set_exception(AuthFailedError())
+ return False
+ elif self._state == KeeperState.CLOSED:
+ async_object.set_exception(ConnectionClosedError(
+ "Connection has been closed"))
+ return False
+ elif self._state in (KeeperState.EXPIRED_SESSION,
+ KeeperState.CONNECTING):
+ async_object.set_exception(SessionExpiredError())
+ return False
+
+ self._queue.append((request, async_object))
+
+ # wake the connection, guarding against a race with close()
+ write_pipe = self._connection._write_pipe
+ if write_pipe is None:
+ async_object.set_exception(ConnectionClosedError(
+ "Connection has been closed"))
+ try:
+ os.write(write_pipe, b'\0')
+ except:
+ async_object.set_exception(ConnectionClosedError(
+ "Connection has been closed"))
+
+ def start(self, timeout=15):
+ """Initiate connection to ZK.
+
+ :param timeout: Time in seconds to wait for connection to
+ succeed.
+ :raises: :attr:`~kazoo.interfaces.IHandler.timeout_exception`
+ if the connection wasn't established within `timeout`
+ seconds.
+
+ """
+ event = self.start_async()
+ event.wait(timeout=timeout)
+ if not self.connected:
+ # We time-out, ensure we are disconnected
+ self.stop()
+ raise self.handler.timeout_exception("Connection time-out")
+
+ if self.chroot and not self.exists("/"):
+ warnings.warn("No chroot path exists, the chroot path "
+ "should be created before normal use.")
+
+ def start_async(self):
+ """Asynchronously initiate connection to ZK.
+
+ :returns: An event object that can be checked to see if the
+ connection is alive.
+ :rtype: :class:`~threading.Event` compatible object.
+
+ """
+ # If we're already connected, ignore
+ if self._live.is_set():
+ return self._live
+
+ # Make sure we're safely closed
+ self._safe_close()
+
+ # We've been asked to connect, clear the stop and our writer
+ # thread indicator
+ self._stopped.clear()
+ self._writer_stopped.clear()
+
+ # Start the handler
+ self.handler.start()
+
+ # Start the connection
+ self._connection.start()
+ return self._live
+
+ def stop(self):
+ """Gracefully stop this Zookeeper session.
+
+ This method can be called while a reconnection attempt is in
+ progress, which will then be halted.
+
+ Once the connection is closed, its session becomes invalid. All
+ the ephemeral nodes in the ZooKeeper server associated with the
+ session will be removed. The watches left on those nodes (and
+ on their parents) will be triggered.
+
+ """
+ if self._stopped.is_set():
+ return
+
+ self._stopped.set()
+ self._queue.append((CloseInstance, None))
+ os.write(self._connection._write_pipe, b'\0')
+ self._safe_close()
+
+ def restart(self):
+ """Stop and restart the Zookeeper session."""
+ self.stop()
+ self.start()
+
+ def close(self):
+ """Free any resources held by the client.
+
+ This method should be called on a stopped client before it is
+ discarded. Not doing so may result in filehandles being leaked.
+
+ .. versionadded:: 1.0
+ """
+ self._connection.close()
+
+ def command(self, cmd=b'ruok'):
+ """Sent a management command to the current ZK server.
+
+ Examples are `ruok`, `envi` or `stat`.
+
+ :returns: An unstructured textual response.
+ :rtype: str
+
+ :raises:
+ :exc:`ConnectionLoss` if there is no connection open, or
+ possibly a :exc:`socket.error` if there's a problem with
+ the connection used just for this command.
+
+ .. versionadded:: 0.5
+
+ """
+ if not self._live.is_set():
+ raise ConnectionLoss("No connection to server")
+
+ peer = self._connection._socket.getpeername()
+ sock = self.handler.create_connection(
+ peer, timeout=self._session_timeout / 1000.0)
+ sock.sendall(cmd)
+ result = sock.recv(8192)
+ sock.close()
+ return result.decode('utf-8', 'replace')
+
+ def server_version(self):
+ """Get the version of the currently connected ZK server.
+
+ :returns: The server version, for example (3, 4, 3).
+ :rtype: tuple
+
+ .. versionadded:: 0.5
+
+ """
+ data = self.command(b'envi')
+ string = ENVI_VERSION.match(data).group(1)
+ return tuple([int(i) for i in string.split('.')])
+
+ def add_auth(self, scheme, credential):
+ """Send credentials to server.
+
+ :param scheme: authentication scheme (default supported:
+ "digest").
+ :param credential: the credential -- value depends on scheme.
+
+ :returns: True if it was successful.
+ :rtype: bool
+
+ :raises:
+ :exc:`~kazoo.exceptions.AuthFailedError` if it failed though
+ the session state will be set to AUTH_FAILED as well.
+
+ """
+ return self.add_auth_async(scheme, credential).get()
+
+ def add_auth_async(self, scheme, credential):
+ """Asynchronously send credentials to server. Takes the same
+ arguments as :meth:`add_auth`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(scheme, basestring):
+ raise TypeError("Invalid type for scheme")
+ if not isinstance(credential, basestring):
+ raise TypeError("Invalid type for credential")
+
+ # we need this auth data to re-authenticate on reconnect
+ self.auth_data.add((scheme, credential))
+
+ async_result = self.handler.async_result()
+ self._call(Auth(0, scheme, credential), async_result)
+ return async_result
+
+ def unchroot(self, path):
+ """Strip the chroot if applicable from the path."""
+ if not self.chroot:
+ return path
+
+ if path.startswith(self.chroot):
+ return path[len(self.chroot):]
+ else:
+ return path
+
+ def sync_async(self, path):
+ """Asynchronous sync.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ async_result = self.handler.async_result()
+ self._call(Sync(_prefix_root(self.chroot, path)), async_result)
+ return async_result
+
+ def sync(self, path):
+ """Sync, blocks until response is acknowledged.
+
+ Flushes channel between process and leader.
+
+ :param path: path of node.
+ :returns: The node path that was synced.
+ :raises:
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ .. versionadded:: 0.5
+
+ """
+ return self.sync_async(path).get()
+
+ def create(self, path, value=b"", acl=None, ephemeral=False,
+ sequence=False, makepath=False):
+ """Create a node with the given value as its data. Optionally
+ set an ACL on the node.
+
+ The ephemeral and sequence arguments determine the type of the
+ node.
+
+ An ephemeral node will be automatically removed by ZooKeeper
+ when the session associated with the creation of the node
+ expires.
+
+ A sequential node will be given the specified path plus a
+ suffix `i` where i is the current sequential number of the
+ node. The sequence number is always fixed length of 10 digits,
+ 0 padded. Once such a node is created, the sequential number
+ will be incremented by one.
+
+ If a node with the same actual path already exists in
+ ZooKeeper, a NodeExistsError will be raised. Note that since a
+ different actual path is used for each invocation of creating
+ sequential nodes with the same path argument, the call will
+ never raise NodeExistsError.
+
+ If the parent node does not exist in ZooKeeper, a NoNodeError
+ will be raised. Setting the optional `makepath` argument to
+ `True` will create all missing parent nodes instead.
+
+ An ephemeral node cannot have children. If the parent node of
+ the given path is ephemeral, a NoChildrenForEphemeralsError
+ will be raised.
+
+ This operation, if successful, will trigger all the watches
+ left on the node of the given path by :meth:`exists` and
+ :meth:`get` API calls, and the watches left on the parent node
+ by :meth:`get_children` API calls.
+
+ The maximum allowable size of the node value is 1 MB. Values
+ larger than this will cause a ZookeeperError to be raised.
+
+ :param path: Path of node.
+ :param value: Initial bytes value of node.
+ :param acl: :class:`~kazoo.security.ACL` list.
+ :param ephemeral: Boolean indicating whether node is ephemeral
+ (tied to this session).
+ :param sequence: Boolean indicating whether path is suffixed
+ with a unique index.
+ :param makepath: Whether the path should be created if it
+ doesn't exist.
+ :returns: Real path of the new node.
+ :rtype: str
+
+ :raises:
+ :exc:`~kazoo.exceptions.NodeExistsError` if the node
+ already exists.
+
+ :exc:`~kazoo.exceptions.NoNodeError` if parent nodes are
+ missing.
+
+ :exc:`~kazoo.exceptions.NoChildrenForEphemeralsError` if
+ the parent node is an ephemeral node.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the provided
+ value is too large.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ """
+ acl = acl or self.default_acl
+ return self.create_async(path, value, acl=acl, ephemeral=ephemeral,
+ sequence=sequence, makepath=makepath).get()
+
+ def create_async(self, path, value=b"", acl=None, ephemeral=False,
+ sequence=False, makepath=False):
+ """Asynchronously create a ZNode. Takes the same arguments as
+ :meth:`create`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ .. versionadded:: 1.1
+ The makepath option.
+
+ """
+ if acl is None and self.default_acl:
+ acl = self.default_acl
+
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if acl and (isinstance(acl, ACL) or
+ not isinstance(acl, (tuple, list))):
+ raise TypeError("acl must be a tuple/list of ACL's")
+ if value is not None and not isinstance(value, bytes):
+ raise TypeError("value must be a byte string")
+ if not isinstance(ephemeral, bool):
+ raise TypeError("ephemeral must be a bool")
+ if not isinstance(sequence, bool):
+ raise TypeError("sequence must be a bool")
+ if not isinstance(makepath, bool):
+ raise TypeError("makepath must be a bool")
+
+ flags = 0
+ if ephemeral:
+ flags |= 1
+ if sequence:
+ flags |= 2
+ if acl is None:
+ acl = OPEN_ACL_UNSAFE
+
+ async_result = self.handler.async_result()
+
+ @capture_exceptions(async_result)
+ def do_create():
+ result = self._create_async_inner(path, value, acl, flags, trailing=sequence)
+ result.rawlink(create_completion)
+
+ @capture_exceptions(async_result)
+ def retry_completion(result):
+ result.get()
+ do_create()
+
+ @wrap(async_result)
+ def create_completion(result):
+ try:
+ return self.unchroot(result.get())
+ except NoNodeError:
+ if not makepath:
+ raise
+ if sequence and path.endswith('/'):
+ parent = path.rstrip('/')
+ else:
+ parent, _ = split(path)
+ self.ensure_path_async(parent, acl).rawlink(retry_completion)
+
+ do_create()
+ return async_result
+
+ def _create_async_inner(self, path, value, acl, flags, trailing=False):
+ async_result = self.handler.async_result()
+ call_result = self._call(
+ Create(_prefix_root(self.chroot, path, trailing=trailing),
+ value, acl, flags), async_result)
+ if call_result is False:
+ # We hit a short-circuit exit on the _call. Because we are
+ # not using the original async_result here, we bubble the
+ # exception upwards to the do_create function in
+ # KazooClient.create so that it gets set on the correct
+ # async_result object
+ raise async_result.exception
+ return async_result
+
+ def ensure_path(self, path, acl=None):
+ """Recursively create a path if it doesn't exist.
+
+ :param path: Path of node.
+ :param acl: Permissions for node.
+
+ """
+ return self.ensure_path_async(path, acl).get()
+
+ def ensure_path_async(self, path, acl=None):
+ """Recursively create a path asynchronously if it doesn't
+ exist. Takes the same arguments as :meth:`ensure_path`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ .. versionadded:: 1.1
+
+ """
+ acl = acl or self.default_acl
+ async_result = self.handler.async_result()
+
+ @wrap(async_result)
+ def create_completion(result):
+ try:
+ return result.get()
+ except NodeExistsError:
+ return True
+
+ @capture_exceptions(async_result)
+ def prepare_completion(next_path, result):
+ result.get()
+ self.create_async(next_path, acl=acl).rawlink(create_completion)
+
+ @wrap(async_result)
+ def exists_completion(path, result):
+ if result.get():
+ return True
+ parent, node = split(path)
+ if node:
+ self.ensure_path_async(parent, acl=acl).rawlink(
+ partial(prepare_completion, path))
+ else:
+ self.create_async(path, acl=acl).rawlink(create_completion)
+
+ self.exists_async(path).rawlink(partial(exists_completion, path))
+
+ return async_result
+
+ def exists(self, path, watch=None):
+ """Check if a node exists.
+
+ If a watch is provided, it will be left on the node with the
+ given path. The watch will be triggered by a successful
+ operation that creates/deletes the node or sets the data on the
+ node.
+
+ :param path: Path of node.
+ :param watch: Optional watch callback to set for future changes
+ to this path.
+ :returns: ZnodeStat of the node if it exists, else None if the
+ node does not exist.
+ :rtype: :class:`~kazoo.protocol.states.ZnodeStat` or `None`.
+
+ :raises:
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ """
+ return self.exists_async(path, watch).get()
+
+ def exists_async(self, path, watch=None):
+ """Asynchronously check if a node exists. Takes the same
+ arguments as :meth:`exists`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if watch and not callable(watch):
+ raise TypeError("watch must be a callable")
+
+ async_result = self.handler.async_result()
+ self._call(Exists(_prefix_root(self.chroot, path), watch),
+ async_result)
+ return async_result
+
+ def get(self, path, watch=None):
+ """Get the value of a node.
+
+ If a watch is provided, it will be left on the node with the
+ given path. The watch will be triggered by a successful
+ operation that sets data on the node, or deletes the node.
+
+ :param path: Path of node.
+ :param watch: Optional watch callback to set for future changes
+ to this path.
+ :returns:
+ Tuple (value, :class:`~kazoo.protocol.states.ZnodeStat`) of
+ node.
+ :rtype: tuple
+
+ :raises:
+ :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+ exist
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code
+
+ """
+ return self.get_async(path, watch).get()
+
+ def get_async(self, path, watch=None):
+ """Asynchronously get the value of a node. Takes the same
+ arguments as :meth:`get`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if watch and not callable(watch):
+ raise TypeError("watch must be a callable")
+
+ async_result = self.handler.async_result()
+ self._call(GetData(_prefix_root(self.chroot, path), watch),
+ async_result)
+ return async_result
+
+ def get_children(self, path, watch=None, include_data=False):
+ """Get a list of child nodes of a path.
+
+ If a watch is provided it will be left on the node with the
+ given path. The watch will be triggered by a successful
+ operation that deletes the node of the given path or
+ creates/deletes a child under the node.
+
+ The list of children returned is not sorted and no guarantee is
+ provided as to its natural or lexical order.
+
+ :param path: Path of node to list.
+ :param watch: Optional watch callback to set for future changes
+ to this path.
+ :param include_data:
+ Include the :class:`~kazoo.protocol.states.ZnodeStat` of
+ the node in addition to the children. This option changes
+ the return value to be a tuple of (children, stat).
+
+ :returns: List of child node names, or tuple if `include_data`
+ is `True`.
+ :rtype: list
+
+ :raises:
+ :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+ exist.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ .. versionadded:: 0.5
+ The `include_data` option.
+
+ """
+ return self.get_children_async(path, watch, include_data).get()
+
+ def get_children_async(self, path, watch=None, include_data=False):
+ """Asynchronously get a list of child nodes of a path. Takes
+ the same arguments as :meth:`get_children`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if watch and not callable(watch):
+ raise TypeError("watch must be a callable")
+ if not isinstance(include_data, bool):
+ raise TypeError("include_data must be a bool")
+
+ async_result = self.handler.async_result()
+ if include_data:
+ req = GetChildren2(_prefix_root(self.chroot, path), watch)
+ else:
+ req = GetChildren(_prefix_root(self.chroot, path), watch)
+ self._call(req, async_result)
+ return async_result
+
+ def get_acls(self, path):
+ """Return the ACL and stat of the node of the given path.
+
+ :param path: Path of the node.
+ :returns: The ACL array of the given node and its
+ :class:`~kazoo.protocol.states.ZnodeStat`.
+ :rtype: tuple of (:class:`~kazoo.security.ACL` list,
+ :class:`~kazoo.protocol.states.ZnodeStat`)
+ :raises:
+ :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+ exist.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code
+
+ .. versionadded:: 0.5
+
+ """
+ return self.get_acls_async(path).get()
+
+ def get_acls_async(self, path):
+ """Return the ACL and stat of the node of the given path. Takes
+ the same arguments as :meth:`get_acls`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+
+ async_result = self.handler.async_result()
+ self._call(GetACL(_prefix_root(self.chroot, path)), async_result)
+ return async_result
+
+ def set_acls(self, path, acls, version=-1):
+ """Set the ACL for the node of the given path.
+
+ Set the ACL for the node of the given path if such a node
+ exists and the given version matches the version of the node.
+
+ :param path: Path for the node.
+ :param acls: List of :class:`~kazoo.security.ACL` objects to
+ set.
+ :param version: The expected node version that must match.
+ :returns: The stat of the node.
+ :raises:
+ :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
+ match.
+
+ :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+ exist.
+
+ :exc:`~kazoo.exceptions.InvalidACLError` if the ACL is
+ invalid.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ .. versionadded:: 0.5
+
+ """
+ return self.set_acls_async(path, acls, version).get()
+
+ def set_acls_async(self, path, acls, version=-1):
+ """Set the ACL for the node of the given path. Takes the same
+ arguments as :meth:`set_acls`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if isinstance(acls, ACL) or not isinstance(acls, (tuple, list)):
+ raise TypeError("acl must be a tuple/list of ACL's")
+ if not isinstance(version, int):
+ raise TypeError("version must be an int")
+
+ async_result = self.handler.async_result()
+ self._call(SetACL(_prefix_root(self.chroot, path), acls, version),
+ async_result)
+ return async_result
+
+ def set(self, path, value, version=-1):
+ """Set the value of a node.
+
+ If the version of the node being updated is newer than the
+ supplied version (and the supplied version is not -1), a
+ BadVersionError will be raised.
+
+ This operation, if successful, will trigger all the watches on
+ the node of the given path left by :meth:`get` API calls.
+
+ The maximum allowable size of the value is 1 MB. Values larger
+ than this will cause a ZookeeperError to be raised.
+
+ :param path: Path of node.
+ :param value: New data value.
+ :param version: Version of node being updated, or -1.
+ :returns: Updated :class:`~kazoo.protocol.states.ZnodeStat` of
+ the node.
+
+ :raises:
+ :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
+ match.
+
+ :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+ exist.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the provided
+ value is too large.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ """
+ return self.set_async(path, value, version).get()
+
+ def set_async(self, path, value, version=-1):
+ """Set the value of a node. Takes the same arguments as
+ :meth:`set`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if value is not None and not isinstance(value, bytes):
+ raise TypeError("value must be a byte string")
+ if not isinstance(version, int):
+ raise TypeError("version must be an int")
+
+ async_result = self.handler.async_result()
+ self._call(SetData(_prefix_root(self.chroot, path), value, version),
+ async_result)
+ return async_result
+
+ def transaction(self):
+ """Create and return a :class:`TransactionRequest` object
+
+ Creates a :class:`TransactionRequest` object. A Transaction can
+ consist of multiple operations which can be committed as a
+ single atomic unit. Either all of the operations will succeed
+ or none of them.
+
+ :returns: A TransactionRequest.
+ :rtype: :class:`TransactionRequest`
+
+ .. versionadded:: 0.6
+ Requires Zookeeper 3.4+
+
+ """
+ return TransactionRequest(self)
+
+ def delete(self, path, version=-1, recursive=False):
+ """Delete a node.
+
+ The call will succeed if such a node exists, and the given
+ version matches the node's version (if the given version is -1,
+ the default, it matches any node's versions).
+
+ This operation, if successful, will trigger all the watches on
+ the node of the given path left by `exists` API calls, and the
+ watches on the parent node left by `get_children` API calls.
+
+ :param path: Path of node to delete.
+ :param version: Version of node to delete, or -1 for any.
+ :param recursive: Recursively delete node and all its children,
+ defaults to False.
+ :type recursive: bool
+
+ :raises:
+ :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
+ match.
+
+ :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+ exist.
+
+ :exc:`~kazoo.exceptions.NotEmptyError` if the node has
+ children.
+
+ :exc:`~kazoo.exceptions.ZookeeperError` if the server
+ returns a non-zero error code.
+
+ """
+ if not isinstance(recursive, bool):
+ raise TypeError("recursive must be a bool")
+
+ if recursive:
+ return self._delete_recursive(path)
+ else:
+ return self.delete_async(path, version).get()
+
+ def delete_async(self, path, version=-1):
+ """Asynchronously delete a node. Takes the same arguments as
+ :meth:`delete`, with the exception of `recursive`.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if not isinstance(version, int):
+ raise TypeError("version must be an int")
+ async_result = self.handler.async_result()
+ self._call(Delete(_prefix_root(self.chroot, path), version),
+ async_result)
+ return async_result
+
+ def _delete_recursive(self, path):
+ try:
+ children = self.get_children(path)
+ except NoNodeError:
+ return True
+
+ if children:
+ for child in children:
+ if path == "/":
+ child_path = path + child
+ else:
+ child_path = path + "/" + child
+
+ self._delete_recursive(child_path)
+ try:
+ self.delete(path)
+ except NoNodeError: # pragma: nocover
+ pass
+
+
+class TransactionRequest(object):
+ """A Zookeeper Transaction Request
+
+ A Transaction provides a builder object that can be used to
+ construct and commit an atomic set of operations. The transaction
+ must be committed before its sent.
+
+ Transactions are not thread-safe and should not be accessed from
+ multiple threads at once.
+
+ .. versionadded:: 0.6
+ Requires Zookeeper 3.4+
+
+ """
+ def __init__(self, client):
+ self.client = client
+ self.operations = []
+ self.committed = False
+
+ def create(self, path, value=b"", acl=None, ephemeral=False,
+ sequence=False):
+ """Add a create ZNode to the transaction. Takes the same
+ arguments as :meth:`KazooClient.create`, with the exception
+ of `makepath`.
+
+ :returns: None
+
+ """
+ if acl is None and self.client.default_acl:
+ acl = self.client.default_acl
+
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if acl and not isinstance(acl, (tuple, list)):
+ raise TypeError("acl must be a tuple/list of ACL's")
+ if not isinstance(value, bytes):
+ raise TypeError("value must be a byte string")
+ if not isinstance(ephemeral, bool):
+ raise TypeError("ephemeral must be a bool")
+ if not isinstance(sequence, bool):
+ raise TypeError("sequence must be a bool")
+
+ flags = 0
+ if ephemeral:
+ flags |= 1
+ if sequence:
+ flags |= 2
+ if acl is None:
+ acl = OPEN_ACL_UNSAFE
+
+ self._add(Create(_prefix_root(self.client.chroot, path), value, acl,
+ flags), None)
+
+ def delete(self, path, version=-1):
+ """Add a delete ZNode to the transaction. Takes the same
+ arguments as :meth:`KazooClient.delete`, with the exception of
+ `recursive`.
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if not isinstance(version, int):
+ raise TypeError("version must be an int")
+ self._add(Delete(_prefix_root(self.client.chroot, path), version))
+
+ def set_data(self, path, value, version=-1):
+ """Add a set ZNode value to the transaction. Takes the same
+ arguments as :meth:`KazooClient.set`.
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if not isinstance(value, bytes):
+ raise TypeError("value must be a byte string")
+ if not isinstance(version, int):
+ raise TypeError("version must be an int")
+ self._add(SetData(_prefix_root(self.client.chroot, path), value,
+ version))
+
+ def check(self, path, version):
+ """Add a Check Version to the transaction.
+
+ This command will fail and abort a transaction if the path
+ does not match the specified version.
+
+ """
+ if not isinstance(path, basestring):
+ raise TypeError("path must be a string")
+ if not isinstance(version, int):
+ raise TypeError("version must be an int")
+ self._add(CheckVersion(_prefix_root(self.client.chroot, path),
+ version))
+
+ def commit_async(self):
+ """Commit the transaction asynchronously.
+
+ :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+ """
+ self._check_tx_state()
+ self.committed = True
+ async_object = self.client.handler.async_result()
+ self.client._call(Transaction(self.operations), async_object)
+ return async_object
+
+ def commit(self):
+ """Commit the transaction.
+
+ :returns: A list of the results for each operation in the
+ transaction.
+
+ """
+ return self.commit_async().get()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ """Commit and cleanup accumulated transaction data."""
+ if not exc_type:
+ self.commit()
+
+ def _check_tx_state(self):
+ if self.committed:
+ raise ValueError('Transaction already committed')
+
+ def _add(self, request, post_processor=None):
+ self._check_tx_state()
+ self.client.logger.log(BLATHER, 'Added %r to %r', request, self)
+ self.operations.append(request)
diff --git a/slider-agent/src/main/python/kazoo/exceptions.py b/slider-agent/src/main/python/kazoo/exceptions.py
new file mode 100644
index 0000000..9c9e71d
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/exceptions.py
@@ -0,0 +1,200 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Exceptions"""
+from collections import defaultdict
+
+
+class KazooException(Exception):
+ """Base Kazoo exception that all other kazoo library exceptions
+ inherit from"""
+
+
+class ZookeeperError(KazooException):
+ """Base Zookeeper exception for errors originating from the
+ Zookeeper server"""
+
+
+class CancelledError(KazooException):
+ """Raised when a process is cancelled by another thread"""
+
+
+class ConfigurationError(KazooException):
+ """Raised if the configuration arguments to an object are
+ invalid"""
+
+
+class ZookeeperStoppedError(KazooException):
+ """Raised when the kazoo client stopped (and thus not connected)"""
+
+
+class ConnectionDropped(KazooException):
+ """Internal error for jumping out of loops"""
+
+
+class LockTimeout(KazooException):
+ """Raised if failed to acquire a lock.
+
+ .. versionadded:: 1.1
+ """
+
+
+class WriterNotClosedException(KazooException):
+ """Raised if the writer is unable to stop closing when requested.
+
+ .. versionadded:: 1.2
+ """
+
+
+def _invalid_error_code():
+ raise RuntimeError('Invalid error code')
+
+
+EXCEPTIONS = defaultdict(_invalid_error_code)
+
+
+def _zookeeper_exception(code):
+ def decorator(klass):
+ def create(*args, **kwargs):
+ return klass(args, kwargs)
+
+ EXCEPTIONS[code] = create
+ klass.code = code
+ return klass
+
+ return decorator
+
+
+@_zookeeper_exception(0)
+class RolledBackError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-1)
+class SystemZookeeperError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-2)
+class RuntimeInconsistency(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-3)
+class DataInconsistency(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-4)
+class ConnectionLoss(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-5)
+class MarshallingError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-6)
+class UnimplementedError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-7)
+class OperationTimeoutError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-8)
+class BadArgumentsError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-100)
+class APIError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-101)
+class NoNodeError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-102)
+class NoAuthError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-103)
+class BadVersionError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-108)
+class NoChildrenForEphemeralsError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-110)
+class NodeExistsError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-111)
+class NotEmptyError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-112)
+class SessionExpiredError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-113)
+class InvalidCallbackError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-114)
+class InvalidACLError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-115)
+class AuthFailedError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-118)
+class SessionMovedError(ZookeeperError):
+ pass
+
+
+@_zookeeper_exception(-119)
+class NotReadOnlyCallError(ZookeeperError):
+ """An API call that is not read-only was used while connected to
+ a read-only server"""
+
+
+class ConnectionClosedError(SessionExpiredError):
+ """Connection is closed"""
+
+
+# BW Compat aliases for C lib style exceptions
+ConnectionLossException = ConnectionLoss
+MarshallingErrorException = MarshallingError
+SystemErrorException = SystemZookeeperError
+RuntimeInconsistencyException = RuntimeInconsistency
+DataInconsistencyException = DataInconsistency
+UnimplementedException = UnimplementedError
+OperationTimeoutException = OperationTimeoutError
+BadArgumentsException = BadArgumentsError
+ApiErrorException = APIError
+NoNodeException = NoNodeError
+NoAuthException = NoAuthError
+BadVersionException = BadVersionError
+NoChildrenForEphemeralsException = NoChildrenForEphemeralsError
+NodeExistsException = NodeExistsError
+InvalidACLException = InvalidACLError
+AuthFailedException = AuthFailedError
+NotEmptyException = NotEmptyError
+SessionExpiredException = SessionExpiredError
+InvalidCallbackException = InvalidCallbackError
diff --git a/slider-agent/src/main/python/kazoo/handlers/__init__.py b/slider-agent/src/main/python/kazoo/handlers/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/handlers/gevent.py b/slider-agent/src/main/python/kazoo/handlers/gevent.py
new file mode 100644
index 0000000..060320c
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/gevent.py
@@ -0,0 +1,165 @@
+"""
+license: Apache License 2.0, see LICENSE for more details.
+A gevent based handler.
+"""
+
+from __future__ import absolute_import
+
+import atexit
+import logging
+
+import gevent
+import gevent.event
+import gevent.queue
+import gevent.select
+import gevent.thread
+
+from gevent.queue import Empty
+from gevent.queue import Queue
+from gevent import socket
+try:
+ from gevent.lock import Semaphore, RLock
+except ImportError:
+ from gevent.coros import Semaphore, RLock
+
+from kazoo.handlers.utils import create_tcp_socket, create_tcp_connection
+
+_using_libevent = gevent.__version__.startswith('0.')
+
+log = logging.getLogger(__name__)
+
+_STOP = object()
+
+AsyncResult = gevent.event.AsyncResult
+
+
+class SequentialGeventHandler(object):
+ """Gevent handler for sequentially executing callbacks.
+
+ This handler executes callbacks in a sequential manner. A queue is
+ created for each of the callback events, so that each type of event
+ has its callback type run sequentially.
+
+ Each queue type has a greenlet worker that pulls the callback event
+ off the queue and runs it in the order the client sees it.
+
+ This split helps ensure that watch callbacks won't block session
+ re-establishment should the connection be lost during a Zookeeper
+ client call.
+
+ Watch callbacks should avoid blocking behavior as the next callback
+ of that type won't be run until it completes. If you need to block,
+ spawn a new greenlet and return immediately so callbacks can
+ proceed.
+
+ """
+ name = "sequential_gevent_handler"
+ sleep_func = staticmethod(gevent.sleep)
+
+ def __init__(self):
+ """Create a :class:`SequentialGeventHandler` instance"""
+ self.callback_queue = Queue()
+ self._running = False
+ self._async = None
+ self._state_change = Semaphore()
+ self._workers = []
+
+ class timeout_exception(gevent.event.Timeout):
+ def __init__(self, msg):
+ gevent.event.Timeout.__init__(self, exception=msg)
+
+ def _create_greenlet_worker(self, queue):
+ def greenlet_worker():
+ while True:
+ try:
+ func = queue.get()
+ if func is _STOP:
+ break
+ func()
+ except Empty:
+ continue
+ except Exception as exc:
+ log.warning("Exception in worker greenlet")
+ log.exception(exc)
+ return gevent.spawn(greenlet_worker)
+
+ def start(self):
+ """Start the greenlet workers."""
+ with self._state_change:
+ if self._running:
+ return
+
+ self._running = True
+
+ # Spawn our worker greenlets, we have
+ # - A callback worker for watch events to be called
+ for queue in (self.callback_queue,):
+ w = self._create_greenlet_worker(queue)
+ self._workers.append(w)
+ atexit.register(self.stop)
+
+ def stop(self):
+ """Stop the greenlet workers and empty all queues."""
+ with self._state_change:
+ if not self._running:
+ return
+
+ self._running = False
+
+ for queue in (self.callback_queue,):
+ queue.put(_STOP)
+
+ while self._workers:
+ worker = self._workers.pop()
+ worker.join()
+
+ # Clear the queues
+ self.callback_queue = Queue() # pragma: nocover
+
+ if hasattr(atexit, "unregister"):
+ atexit.unregister(self.stop)
+
+ def select(self, *args, **kwargs):
+ return gevent.select.select(*args, **kwargs)
+
+ def socket(self, *args, **kwargs):
+ return create_tcp_socket(socket)
+
+ def create_connection(self, *args, **kwargs):
+ return create_tcp_connection(socket, *args, **kwargs)
+
+ def event_object(self):
+ """Create an appropriate Event object"""
+ return gevent.event.Event()
+
+ def lock_object(self):
+ """Create an appropriate Lock object"""
+ return gevent.thread.allocate_lock()
+
+ def rlock_object(self):
+ """Create an appropriate RLock object"""
+ return RLock()
+
+ def async_result(self):
+ """Create a :class:`AsyncResult` instance
+
+ The :class:`AsyncResult` instance will have its completion
+ callbacks executed in the thread the
+ :class:`SequentialGeventHandler` is created in (which should be
+ the gevent/main thread).
+
+ """
+ return AsyncResult()
+
+ def spawn(self, func, *args, **kwargs):
+ """Spawn a function to run asynchronously"""
+ return gevent.spawn(func, *args, **kwargs)
+
+ def dispatch_callback(self, callback):
+ """Dispatch to the callback object
+
+ The callback is put on separate queues to run depending on the
+ type as documented for the :class:`SequentialGeventHandler`.
+
+ """
+ self.callback_queue.put(lambda: callback.func(*callback.args))
diff --git a/slider-agent/src/main/python/kazoo/handlers/threading.py b/slider-agent/src/main/python/kazoo/handlers/threading.py
new file mode 100644
index 0000000..3ca9a8f
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/threading.py
@@ -0,0 +1,289 @@
+"""
+license: Apache License 2.0, see LICENSE for more details.
+A threading based handler.
+
+The :class:`SequentialThreadingHandler` is intended for regular Python
+environments that use threads.
+
+.. warning::
+
+ Do not use :class:`SequentialThreadingHandler` with applications
+ using asynchronous event loops (like gevent). Use the
+ :class:`~kazoo.handlers.gevent.SequentialGeventHandler` instead.
+
+"""
+from __future__ import absolute_import
+
+import atexit
+import logging
+import select
+import socket
+import threading
+import time
+
+try:
+ import Queue
+except ImportError: # pragma: nocover
+ import queue as Queue
+
+from kazoo.handlers.utils import create_tcp_socket, create_tcp_connection
+
+# sentinel objects
+_NONE = object()
+_STOP = object()
+
+log = logging.getLogger(__name__)
+
+
+class TimeoutError(Exception):
+ pass
+
+
+class AsyncResult(object):
+ """A one-time event that stores a value or an exception"""
+ def __init__(self, handler):
+ self._handler = handler
+ self.value = None
+ self._exception = _NONE
+ self._condition = threading.Condition()
+ self._callbacks = []
+
+ def ready(self):
+ """Return true if and only if it holds a value or an
+ exception"""
+ return self._exception is not _NONE
+
+ def successful(self):
+ """Return true if and only if it is ready and holds a value"""
+ return self._exception is None
+
+ @property
+ def exception(self):
+ if self._exception is not _NONE:
+ return self._exception
+
+ def set(self, value=None):
+ """Store the value. Wake up the waiters."""
+ with self._condition:
+ self.value = value
+ self._exception = None
+
+ for callback in self._callbacks:
+ self._handler.completion_queue.put(
+ lambda: callback(self)
+ )
+ self._condition.notify_all()
+
+ def set_exception(self, exception):
+ """Store the exception. Wake up the waiters."""
+ with self._condition:
+ self._exception = exception
+
+ for callback in self._callbacks:
+ self._handler.completion_queue.put(
+ lambda: callback(self)
+ )
+ self._condition.notify_all()
+
+ def get(self, block=True, timeout=None):
+ """Return the stored value or raise the exception.
+
+ If there is no value raises TimeoutError.
+
+ """
+ with self._condition:
+ if self._exception is not _NONE:
+ if self._exception is None:
+ return self.value
+ raise self._exception
+ elif block:
+ self._condition.wait(timeout)
+ if self._exception is not _NONE:
+ if self._exception is None:
+ return self.value
+ raise self._exception
+
+ # if we get to this point we timeout
+ raise TimeoutError()
+
+ def get_nowait(self):
+ """Return the value or raise the exception without blocking.
+
+ If nothing is available, raises TimeoutError
+
+ """
+ return self.get(block=False)
+
+ def wait(self, timeout=None):
+ """Block until the instance is ready."""
+ with self._condition:
+ self._condition.wait(timeout)
+ return self._exception is not _NONE
+
+ def rawlink(self, callback):
+ """Register a callback to call when a value or an exception is
+ set"""
+ with self._condition:
+ # Are we already set? Dispatch it now
+ if self.ready():
+ self._handler.completion_queue.put(
+ lambda: callback(self)
+ )
+ return
+
+ if callback not in self._callbacks:
+ self._callbacks.append(callback)
+
+ def unlink(self, callback):
+ """Remove the callback set by :meth:`rawlink`"""
+ with self._condition:
+ if self.ready():
+ # Already triggered, ignore
+ return
+
+ if callback in self._callbacks:
+ self._callbacks.remove(callback)
+
+
+class SequentialThreadingHandler(object):
+ """Threading handler for sequentially executing callbacks.
+
+ This handler executes callbacks in a sequential manner. A queue is
+ created for each of the callback events, so that each type of event
+ has its callback type run sequentially. These are split into two
+ queues, one for watch events and one for async result completion
+ callbacks.
+
+ Each queue type has a thread worker that pulls the callback event
+ off the queue and runs it in the order the client sees it.
+
+ This split helps ensure that watch callbacks won't block session
+ re-establishment should the connection be lost during a Zookeeper
+ client call.
+
+ Watch and completion callbacks should avoid blocking behavior as
+ the next callback of that type won't be run until it completes. If
+ you need to block, spawn a new thread and return immediately so
+ callbacks can proceed.
+
+ .. note::
+
+ Completion callbacks can block to wait on Zookeeper calls, but
+ no other completion callbacks will execute until the callback
+ returns.
+
+ """
+ name = "sequential_threading_handler"
+ timeout_exception = TimeoutError
+ sleep_func = staticmethod(time.sleep)
+ queue_impl = Queue.Queue
+ queue_empty = Queue.Empty
+
+ def __init__(self):
+ """Create a :class:`SequentialThreadingHandler` instance"""
+ self.callback_queue = self.queue_impl()
+ self.completion_queue = self.queue_impl()
+ self._running = False
+ self._state_change = threading.Lock()
+ self._workers = []
+
+ def _create_thread_worker(self, queue):
+ def thread_worker(): # pragma: nocover
+ while True:
+ try:
+ func = queue.get()
+ try:
+ if func is _STOP:
+ break
+ func()
+ except Exception:
+ log.exception("Exception in worker queue thread")
+ finally:
+ queue.task_done()
+ except self.queue_empty:
+ continue
+ t = threading.Thread(target=thread_worker)
+
+ # Even though these should be joined, it's possible stop might
+ # not issue in time so we set them to daemon to let the program
+ # exit anyways
+ t.daemon = True
+ t.start()
+ return t
+
+ def start(self):
+ """Start the worker threads."""
+ with self._state_change:
+ if self._running:
+ return
+
+ # Spawn our worker threads, we have
+ # - A callback worker for watch events to be called
+ # - A completion worker for completion events to be called
+ for queue in (self.completion_queue, self.callback_queue):
+ w = self._create_thread_worker(queue)
+ self._workers.append(w)
+ self._running = True
+ atexit.register(self.stop)
+
+ def stop(self):
+ """Stop the worker threads and empty all queues."""
+ with self._state_change:
+ if not self._running:
+ return
+
+ self._running = False
+
+ for queue in (self.completion_queue, self.callback_queue):
+ queue.put(_STOP)
+
+ self._workers.reverse()
+ while self._workers:
+ worker = self._workers.pop()
+ worker.join()
+
+ # Clear the queues
+ self.callback_queue = self.queue_impl()
+ self.completion_queue = self.queue_impl()
+ if hasattr(atexit, "unregister"):
+ atexit.unregister(self.stop)
+
+ def select(self, *args, **kwargs):
+ return select.select(*args, **kwargs)
+
+ def socket(self):
+ return create_tcp_socket(socket)
+
+ def create_connection(self, *args, **kwargs):
+ return create_tcp_connection(socket, *args, **kwargs)
+
+ def event_object(self):
+ """Create an appropriate Event object"""
+ return threading.Event()
+
+ def lock_object(self):
+ """Create a lock object"""
+ return threading.Lock()
+
+ def rlock_object(self):
+ """Create an appropriate RLock object"""
+ return threading.RLock()
+
+ def async_result(self):
+ """Create a :class:`AsyncResult` instance"""
+ return AsyncResult(self)
+
+ def spawn(self, func, *args, **kwargs):
+ t = threading.Thread(target=func, args=args, kwargs=kwargs)
+ t.daemon = True
+ t.start()
+ return t
+
+ def dispatch_callback(self, callback):
+ """Dispatch to the callback object
+
+ The callback is put on separate queues to run depending on the
+ type as documented for the :class:`SequentialThreadingHandler`.
+
+ """
+ self.callback_queue.put(lambda: callback.func(*callback.args))
diff --git a/slider-agent/src/main/python/kazoo/handlers/utils.py b/slider-agent/src/main/python/kazoo/handlers/utils.py
new file mode 100644
index 0000000..60d6404
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/utils.py
@@ -0,0 +1,94 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo handler helpers"""
+
+HAS_FNCTL = True
+try:
+ import fcntl
+except ImportError: # pragma: nocover
+ HAS_FNCTL = False
+import functools
+import os
+
+
+def _set_fd_cloexec(fd):
+ flags = fcntl.fcntl(fd, fcntl.F_GETFD)
+ fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC)
+
+
+def _set_default_tcpsock_options(module, sock):
+ sock.setsockopt(module.IPPROTO_TCP, module.TCP_NODELAY, 1)
+ if HAS_FNCTL:
+ _set_fd_cloexec(sock)
+ return sock
+
+
+def create_pipe():
+ """Create a non-blocking read/write pipe.
+ """
+ r, w = os.pipe()
+ if HAS_FNCTL:
+ fcntl.fcntl(r, fcntl.F_SETFL, os.O_NONBLOCK)
+ fcntl.fcntl(w, fcntl.F_SETFL, os.O_NONBLOCK)
+ _set_fd_cloexec(r)
+ _set_fd_cloexec(w)
+ return r, w
+
+
+def create_tcp_socket(module):
+ """Create a TCP socket with the CLOEXEC flag set.
+ """
+ type_ = module.SOCK_STREAM
+ if hasattr(module, 'SOCK_CLOEXEC'): # pragma: nocover
+ # if available, set cloexec flag during socket creation
+ type_ |= module.SOCK_CLOEXEC
+ sock = module.socket(module.AF_INET, type_)
+ _set_default_tcpsock_options(module, sock)
+ return sock
+
+
+def create_tcp_connection(module, address, timeout=None):
+ if timeout is None:
+ # thanks to create_connection() developers for
+ # this ugliness...
+ timeout = module._GLOBAL_DEFAULT_TIMEOUT
+
+ sock = module.create_connection(address, timeout)
+ _set_default_tcpsock_options(module, sock)
+ return sock
+
+
+def capture_exceptions(async_result):
+ """Return a new decorated function that propagates the exceptions of the
+ wrapped function to an async_result.
+
+ :param async_result: An async result implementing :class:`IAsyncResult`
+
+ """
+ def capture(function):
+ @functools.wraps(function)
+ def captured_function(*args, **kwargs):
+ try:
+ return function(*args, **kwargs)
+ except Exception as exc:
+ async_result.set_exception(exc)
+ return captured_function
+ return capture
+
+
+def wrap(async_result):
+ """Return a new decorated function that propagates the return value or
+ exception of wrapped function to an async_result. NOTE: Only propagates a
+ non-None return value.
+
+ :param async_result: An async result implementing :class:`IAsyncResult`
+
+ """
+ def capture(function):
+ @capture_exceptions(async_result)
+ def captured_function(*args, **kwargs):
+ value = function(*args, **kwargs)
+ if value is not None:
+ async_result.set(value)
+ return value
+ return captured_function
+ return capture
diff --git a/slider-agent/src/main/python/kazoo/hosts.py b/slider-agent/src/main/python/kazoo/hosts.py
new file mode 100644
index 0000000..ca0dd35
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/hosts.py
@@ -0,0 +1,27 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import random
+
+try:
+ from urlparse import urlsplit
+except ImportError:
+ # try python3 then
+ from urllib.parse import urlsplit
+
+def collect_hosts(hosts, randomize=True):
+ """Collect a set of hosts and an optional chroot from a string."""
+ host_ports, chroot = hosts.partition("/")[::2]
+ chroot = "/" + chroot if chroot else None
+
+ result = []
+ for host_port in host_ports.split(","):
+ # put all complexity of dealing with
+ # IPv4 & IPv6 address:port on the urlsplit
+ res = urlsplit("xxx://" + host_port)
+ host = res.hostname
+ port = int(res.port) if res.port else 2181
+ result.append((host.strip(), port))
+
+ if randomize:
+ random.shuffle(result)
+
+ return result, chroot
diff --git a/slider-agent/src/main/python/kazoo/interfaces.py b/slider-agent/src/main/python/kazoo/interfaces.py
new file mode 100644
index 0000000..7aff561
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/interfaces.py
@@ -0,0 +1,204 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Interfaces
+
+.. versionchanged:: 1.4
+
+ The classes in this module used to be interface declarations based on
+ `zope.interface.Interface`. They were converted to normal classes and
+ now serve as documentation only.
+
+"""
+
+# public API
+
+
+class IHandler(object):
+ """A Callback Handler for Zookeeper completion and watch callbacks.
+
+ This object must implement several methods responsible for
+ determining how completion / watch callbacks are handled as well as
+ the method for calling :class:`IAsyncResult` callback functions.
+
+ These functions are used to abstract differences between a Python
+ threading environment and asynchronous single-threaded environments
+ like gevent. The minimum functionality needed for Kazoo to handle
+ these differences is encompassed in this interface.
+
+ The Handler should document how callbacks are called for:
+
+ * Zookeeper completion events
+ * Zookeeper watch events
+
+ .. attribute:: name
+
+ Human readable name of the Handler interface.
+
+ .. attribute:: timeout_exception
+
+ Exception class that should be thrown and captured if a
+ result is not available within the given time.
+
+ .. attribute:: sleep_func
+
+ Appropriate sleep function that can be called with a single
+ argument and sleep.
+
+ """
+
+ def start(self):
+ """Start the handler, used for setting up the handler."""
+
+ def stop(self):
+ """Stop the handler. Should block until the handler is safely
+ stopped."""
+
+ def select(self):
+ """A select method that implements Python's select.select
+ API"""
+
+ def socket(self):
+ """A socket method that implements Python's socket.socket
+ API"""
+
+ def create_connection(self):
+ """A socket method that implements Python's
+ socket.create_connection API"""
+
+ def event_object(self):
+ """Return an appropriate object that implements Python's
+ threading.Event API"""
+
+ def lock_object(self):
+ """Return an appropriate object that implements Python's
+ threading.Lock API"""
+
+ def rlock_object(self):
+ """Return an appropriate object that implements Python's
+ threading.RLock API"""
+
+ def async_result(self):
+ """Return an instance that conforms to the
+ :class:`~IAsyncResult` interface appropriate for this
+ handler"""
+
+ def spawn(self, func, *args, **kwargs):
+ """Spawn a function to run asynchronously
+
+ :param args: args to call the function with.
+ :param kwargs: keyword args to call the function with.
+
+ This method should return immediately and execute the function
+ with the provided args and kwargs in an asynchronous manner.
+
+ """
+
+ def dispatch_callback(self, callback):
+ """Dispatch to the callback object
+
+ :param callback: A :class:`~kazoo.protocol.states.Callback`
+ object to be called.
+
+ """
+
+
+class IAsyncResult(object):
+ """An Async Result object that can be queried for a value that has
+ been set asynchronously.
+
+ This object is modeled on the ``gevent`` AsyncResult object.
+
+ The implementation must account for the fact that the :meth:`set`
+ and :meth:`set_exception` methods will be called from within the
+ Zookeeper thread which may require extra care under asynchronous
+ environments.
+
+ .. attribute:: value
+
+ Holds the value passed to :meth:`set` if :meth:`set` was
+ called. Otherwise `None`.
+
+ .. attribute:: exception
+
+ Holds the exception instance passed to :meth:`set_exception`
+ if :meth:`set_exception` was called. Otherwise `None`.
+
+ """
+
+ def ready(self):
+ """Return `True` if and only if it holds a value or an
+ exception"""
+
+ def successful(self):
+ """Return `True` if and only if it is ready and holds a
+ value"""
+
+ def set(self, value=None):
+ """Store the value. Wake up the waiters.
+
+ :param value: Value to store as the result.
+
+ Any waiters blocking on :meth:`get` or :meth:`wait` are woken
+ up. Sequential calls to :meth:`wait` and :meth:`get` will not
+ block at all."""
+
+ def set_exception(self, exception):
+ """Store the exception. Wake up the waiters.
+
+ :param exception: Exception to raise when fetching the value.
+
+ Any waiters blocking on :meth:`get` or :meth:`wait` are woken
+ up. Sequential calls to :meth:`wait` and :meth:`get` will not
+ block at all."""
+
+ def get(self, block=True, timeout=None):
+ """Return the stored value or raise the exception
+
+ :param block: Whether this method should block or return
+ immediately.
+ :type block: bool
+ :param timeout: How long to wait for a value when `block` is
+ `True`.
+ :type timeout: float
+
+ If this instance already holds a value / an exception, return /
+ raise it immediately. Otherwise, block until :meth:`set` or
+ :meth:`set_exception` has been called or until the optional
+ timeout occurs."""
+
+ def get_nowait(self):
+ """Return the value or raise the exception without blocking.
+
+ If nothing is available, raise the Timeout exception class on
+ the associated :class:`IHandler` interface."""
+
+ def wait(self, timeout=None):
+ """Block until the instance is ready.
+
+ :param timeout: How long to wait for a value when `block` is
+ `True`.
+ :type timeout: float
+
+ If this instance already holds a value / an exception, return /
+ raise it immediately. Otherwise, block until :meth:`set` or
+ :meth:`set_exception` has been called or until the optional
+ timeout occurs."""
+
+ def rawlink(self, callback):
+ """Register a callback to call when a value or an exception is
+ set
+
+ :param callback:
+ A callback function to call after :meth:`set` or
+ :meth:`set_exception` has been called. This function will
+ be passed a single argument, this instance.
+ :type callback: func
+
+ """
+
+ def unlink(self, callback):
+ """Remove the callback set by :meth:`rawlink`
+
+ :param callback: A callback function to remove.
+ :type callback: func
+
+ """
diff --git a/slider-agent/src/main/python/kazoo/loggingsupport.py b/slider-agent/src/main/python/kazoo/loggingsupport.py
new file mode 100644
index 0000000..91ae632
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/loggingsupport.py
@@ -0,0 +1,3 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+BLATHER = 5 # log level for low-level debugging
+
diff --git a/slider-agent/src/main/python/kazoo/protocol/__init__.py b/slider-agent/src/main/python/kazoo/protocol/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/protocol/connection.py b/slider-agent/src/main/python/kazoo/protocol/connection.py
new file mode 100644
index 0000000..3cbb87f
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/connection.py
@@ -0,0 +1,624 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Protocol Connection Handler"""
+import logging
+import os
+import random
+import select
+import socket
+import sys
+import time
+from binascii import hexlify
+from contextlib import contextmanager
+
+from kazoo.exceptions import (
+ AuthFailedError,
+ ConnectionDropped,
+ EXCEPTIONS,
+ SessionExpiredError,
+ NoNodeError
+)
+from kazoo.handlers.utils import create_pipe
+from kazoo.loggingsupport import BLATHER
+from kazoo.protocol.serialization import (
+ Auth,
+ Close,
+ Connect,
+ Exists,
+ GetChildren,
+ Ping,
+ PingInstance,
+ ReplyHeader,
+ Transaction,
+ Watch,
+ int_struct
+)
+from kazoo.protocol.states import (
+ Callback,
+ KeeperState,
+ WatchedEvent,
+ EVENT_TYPE_MAP,
+)
+from kazoo.retry import (
+ ForceRetryError,
+ RetryFailedError
+)
+
+log = logging.getLogger(__name__)
+
+
+# Special testing hook objects used to force a session expired error as
+# if it came from the server
+_SESSION_EXPIRED = object()
+_CONNECTION_DROP = object()
+
+STOP_CONNECTING = object()
+
+CREATED_EVENT = 1
+DELETED_EVENT = 2
+CHANGED_EVENT = 3
+CHILD_EVENT = 4
+
+WATCH_XID = -1
+PING_XID = -2
+AUTH_XID = -4
+
+CLOSE_RESPONSE = Close.type
+
+if sys.version_info > (3, ): # pragma: nocover
+ def buffer(obj, offset=0):
+ return memoryview(obj)[offset:]
+
+ advance_iterator = next
+else: # pragma: nocover
+ def advance_iterator(it):
+ return it.next()
+
+
+class RWPinger(object):
+ """A Read/Write Server Pinger Iterable
+
+ This object is initialized with the hosts iterator object and the
+ socket creation function. Anytime `next` is called on its iterator
+ it yields either False, or a host, port tuple if it found a r/w
+ capable Zookeeper node.
+
+ After the first run-through of hosts, an exponential back-off delay
+ is added before the next run. This delay is tracked internally and
+ the iterator will yield False if called too soon.
+
+ """
+ def __init__(self, hosts, connection_func, socket_handling):
+ self.hosts = hosts
+ self.connection = connection_func
+ self.last_attempt = None
+ self.socket_handling = socket_handling
+
+ def __iter__(self):
+ if not self.last_attempt:
+ self.last_attempt = time.time()
+ delay = 0.5
+ while True:
+ yield self._next_server(delay)
+
+ def _next_server(self, delay):
+ jitter = random.randint(0, 100) / 100.0
+ while time.time() < self.last_attempt + delay + jitter:
+ # Skip rw ping checks if its too soon
+ return False
+ for host, port in self.hosts:
+ log.debug("Pinging server for r/w: %s:%s", host, port)
+ self.last_attempt = time.time()
+ try:
+ with self.socket_handling():
+ sock = self.connection((host, port))
+ sock.sendall(b"isro")
+ result = sock.recv(8192)
+ sock.close()
+ if result == b'rw':
+ return (host, port)
+ else:
+ return False
+ except ConnectionDropped:
+ return False
+
+ # Add some jitter between host pings
+ while time.time() < self.last_attempt + jitter:
+ return False
+ delay *= 2
+
+
+class RWServerAvailable(Exception):
+ """Thrown if a RW Server becomes available"""
+
+
+class ConnectionHandler(object):
+ """Zookeeper connection handler"""
+ def __init__(self, client, retry_sleeper, logger=None):
+ self.client = client
+ self.handler = client.handler
+ self.retry_sleeper = retry_sleeper
+ self.logger = logger or log
+
+ # Our event objects
+ self.connection_closed = client.handler.event_object()
+ self.connection_closed.set()
+ self.connection_stopped = client.handler.event_object()
+ self.connection_stopped.set()
+ self.ping_outstanding = client.handler.event_object()
+
+ self._read_pipe = None
+ self._write_pipe = None
+
+ self._socket = None
+ self._xid = None
+ self._rw_server = None
+ self._ro_mode = False
+
+ self._connection_routine = None
+
+ # This is instance specific to avoid odd thread bug issues in Python
+ # during shutdown global cleanup
+ @contextmanager
+ def _socket_error_handling(self):
+ try:
+ yield
+ except (socket.error, select.error) as e:
+ err = getattr(e, 'strerror', e)
+ raise ConnectionDropped("socket connection error: %s" % (err,))
+
+ def start(self):
+ """Start the connection up"""
+ if self.connection_closed.is_set():
+ self._read_pipe, self._write_pipe = create_pipe()
+ self.connection_closed.clear()
+ if self._connection_routine:
+ raise Exception("Unable to start, connection routine already "
+ "active.")
+ self._connection_routine = self.handler.spawn(self.zk_loop)
+
+ def stop(self, timeout=None):
+ """Ensure the writer has stopped, wait to see if it does."""
+ self.connection_stopped.wait(timeout)
+ if self._connection_routine:
+ self._connection_routine.join()
+ self._connection_routine = None
+ return self.connection_stopped.is_set()
+
+ def close(self):
+ """Release resources held by the connection
+
+ The connection can be restarted afterwards.
+ """
+ if not self.connection_stopped.is_set():
+ raise Exception("Cannot close connection until it is stopped")
+ self.connection_closed.set()
+ wp, rp = self._write_pipe, self._read_pipe
+ self._write_pipe = self._read_pipe = None
+ if wp is not None:
+ os.close(wp)
+ if rp is not None:
+ os.close(rp)
+
+ def _server_pinger(self):
+ """Returns a server pinger iterable, that will ping the next
+ server in the list, and apply a back-off between attempts."""
+ return RWPinger(self.client.hosts, self.handler.create_connection,
+ self._socket_error_handling)
+
+ def _read_header(self, timeout):
+ b = self._read(4, timeout)
+ length = int_struct.unpack(b)[0]
+ b = self._read(length, timeout)
+ header, offset = ReplyHeader.deserialize(b, 0)
+ return header, b, offset
+
+ def _read(self, length, timeout):
+ msgparts = []
+ remaining = length
+ with self._socket_error_handling():
+ while remaining > 0:
+ s = self.handler.select([self._socket], [], [], timeout)[0]
+ if not s: # pragma: nocover
+ # If the read list is empty, we got a timeout. We don't
+ # have to check wlist and xlist as we don't set any
+ raise self.handler.timeout_exception("socket time-out")
+
+ chunk = self._socket.recv(remaining)
+ if chunk == b'':
+ raise ConnectionDropped('socket connection broken')
+ msgparts.append(chunk)
+ remaining -= len(chunk)
+ return b"".join(msgparts)
+
+ def _invoke(self, timeout, request, xid=None):
+ """A special writer used during connection establishment
+ only"""
+ self._submit(request, timeout, xid)
+ zxid = None
+ if xid:
+ header, buffer, offset = self._read_header(timeout)
+ if header.xid != xid:
+ raise RuntimeError('xids do not match, expected %r received %r',
+ xid, header.xid)
+ if header.zxid > 0:
+ zxid = header.zxid
+ if header.err:
+ callback_exception = EXCEPTIONS[header.err]()
+ self.logger.debug(
+ 'Received error(xid=%s) %r', xid, callback_exception)
+ raise callback_exception
+ return zxid
+
+ msg = self._read(4, timeout)
+ length = int_struct.unpack(msg)[0]
+ msg = self._read(length, timeout)
+
+ if hasattr(request, 'deserialize'):
+ try:
+ obj, _ = request.deserialize(msg, 0)
+ except Exception:
+ self.logger.exception("Exception raised during deserialization"
+ " of request: %s", request)
+
+ # raise ConnectionDropped so connect loop will retry
+ raise ConnectionDropped('invalid server response')
+ self.logger.log(BLATHER, 'Read response %s', obj)
+ return obj, zxid
+
+ return zxid
+
+ def _submit(self, request, timeout, xid=None):
+ """Submit a request object with a timeout value and optional
+ xid"""
+ b = bytearray()
+ if xid:
+ b.extend(int_struct.pack(xid))
+ if request.type:
+ b.extend(int_struct.pack(request.type))
+ b += request.serialize()
+ self.logger.log((BLATHER if isinstance(request, Ping) else logging.DEBUG),
+ "Sending request(xid=%s): %s", xid, request)
+ self._write(int_struct.pack(len(b)) + b, timeout)
+
+ def _write(self, msg, timeout):
+ """Write a raw msg to the socket"""
+ sent = 0
+ msg_length = len(msg)
+ with self._socket_error_handling():
+ while sent < msg_length:
+ s = self.handler.select([], [self._socket], [], timeout)[1]
+ if not s: # pragma: nocover
+ # If the write list is empty, we got a timeout. We don't
+ # have to check rlist and xlist as we don't set any
+ raise self.handler.timeout_exception("socket time-out")
+ msg_slice = buffer(msg, sent)
+ bytes_sent = self._socket.send(msg_slice)
+ if not bytes_sent:
+ raise ConnectionDropped('socket connection broken')
+ sent += bytes_sent
+
+ def _read_watch_event(self, buffer, offset):
+ client = self.client
+ watch, offset = Watch.deserialize(buffer, offset)
+ path = watch.path
+
+ self.logger.debug('Received EVENT: %s', watch)
+
+ watchers = []
+
+ if watch.type in (CREATED_EVENT, CHANGED_EVENT):
+ watchers.extend(client._data_watchers.pop(path, []))
+ elif watch.type == DELETED_EVENT:
+ watchers.extend(client._data_watchers.pop(path, []))
+ watchers.extend(client._child_watchers.pop(path, []))
+ elif watch.type == CHILD_EVENT:
+ watchers.extend(client._child_watchers.pop(path, []))
+ else:
+ self.logger.warn('Received unknown event %r', watch.type)
+ return
+
+ # Strip the chroot if needed
+ path = client.unchroot(path)
+ ev = WatchedEvent(EVENT_TYPE_MAP[watch.type], client._state, path)
+
+ # Last check to ignore watches if we've been stopped
+ if client._stopped.is_set():
+ return
+
+ # Dump the watchers to the watch thread
+ for watch in watchers:
+ client.handler.dispatch_callback(Callback('watch', watch, (ev,)))
+
+ def _read_response(self, header, buffer, offset):
+ client = self.client
+ request, async_object, xid = client._pending.popleft()
+ if header.zxid and header.zxid > 0:
+ client.last_zxid = header.zxid
+ if header.xid != xid:
+ raise RuntimeError('xids do not match, expected %r '
+ 'received %r', xid, header.xid)
+
+ # Determine if its an exists request and a no node error
+ exists_error = (header.err == NoNodeError.code and
+ request.type == Exists.type)
+
+ # Set the exception if its not an exists error
+ if header.err and not exists_error:
+ callback_exception = EXCEPTIONS[header.err]()
+ self.logger.debug(
+ 'Received error(xid=%s) %r', xid, callback_exception)
+ if async_object:
+ async_object.set_exception(callback_exception)
+ elif request and async_object:
+ if exists_error:
+ # It's a NoNodeError, which is fine for an exists
+ # request
+ async_object.set(None)
+ else:
+ try:
+ response = request.deserialize(buffer, offset)
+ except Exception as exc:
+ self.logger.exception("Exception raised during deserialization"
+ " of request: %s", request)
+ async_object.set_exception(exc)
+ return
+ self.logger.debug(
+ 'Received response(xid=%s): %r', xid, response)
+
+ # We special case a Transaction as we have to unchroot things
+ if request.type == Transaction.type:
+ response = Transaction.unchroot(client, response)
+
+ async_object.set(response)
+
+ # Determine if watchers should be registered
+ watcher = getattr(request, 'watcher', None)
+ if not client._stopped.is_set() and watcher:
+ if isinstance(request, GetChildren):
+ client._child_watchers[request.path].add(watcher)
+ else:
+ client._data_watchers[request.path].add(watcher)
+
+ if isinstance(request, Close):
+ self.logger.log(BLATHER, 'Read close response')
+ return CLOSE_RESPONSE
+
+ def _read_socket(self, read_timeout):
+ """Called when there's something to read on the socket"""
+ client = self.client
+
+ header, buffer, offset = self._read_header(read_timeout)
+ if header.xid == PING_XID:
+ self.logger.log(BLATHER, 'Received Ping')
+ self.ping_outstanding.clear()
+ elif header.xid == AUTH_XID:
+ self.logger.log(BLATHER, 'Received AUTH')
+
+ request, async_object, xid = client._pending.popleft()
+ if header.err:
+ async_object.set_exception(AuthFailedError())
+ client._session_callback(KeeperState.AUTH_FAILED)
+ else:
+ async_object.set(True)
+ elif header.xid == WATCH_XID:
+ self._read_watch_event(buffer, offset)
+ else:
+ self.logger.log(BLATHER, 'Reading for header %r', header)
+
+ return self._read_response(header, buffer, offset)
+
+ def _send_request(self, read_timeout, connect_timeout):
+ """Called when we have something to send out on the socket"""
+ client = self.client
+ try:
+ request, async_object = client._queue[0]
+ except IndexError:
+ # Not actually something on the queue, this can occur if
+ # something happens to cancel the request such that we
+ # don't clear the pipe below after sending
+ try:
+ # Clear possible inconsistence (no request in the queue
+ # but have data in the read pipe), which causes cpu to spin.
+ os.read(self._read_pipe, 1)
+ except OSError:
+ pass
+ return
+
+ # Special case for testing, if this is a _SessionExpire object
+ # then throw a SessionExpiration error as if we were dropped
+ if request is _SESSION_EXPIRED:
+ raise SessionExpiredError("Session expired: Testing")
+ if request is _CONNECTION_DROP:
+ raise ConnectionDropped("Connection dropped: Testing")
+
+ # Special case for auth packets
+ if request.type == Auth.type:
+ xid = AUTH_XID
+ else:
+ self._xid += 1
+ xid = self._xid
+
+ self._submit(request, connect_timeout, xid)
+ client._queue.popleft()
+ os.read(self._read_pipe, 1)
+ client._pending.append((request, async_object, xid))
+
+ def _send_ping(self, connect_timeout):
+ self.ping_outstanding.set()
+ self._submit(PingInstance, connect_timeout, PING_XID)
+
+ # Determine if we need to check for a r/w server
+ if self._ro_mode:
+ result = advance_iterator(self._ro_mode)
+ if result:
+ self._rw_server = result
+ raise RWServerAvailable()
+
+ def zk_loop(self):
+ """Main Zookeeper handling loop"""
+ self.logger.log(BLATHER, 'ZK loop started')
+
+ self.connection_stopped.clear()
+
+ retry = self.retry_sleeper.copy()
+ try:
+ while not self.client._stopped.is_set():
+ # If the connect_loop returns STOP_CONNECTING, stop retrying
+ if retry(self._connect_loop, retry) is STOP_CONNECTING:
+ break
+ except RetryFailedError:
+ self.logger.warning("Failed connecting to Zookeeper "
+ "within the connection retry policy.")
+ finally:
+ self.connection_stopped.set()
+ self.client._session_callback(KeeperState.CLOSED)
+ self.logger.log(BLATHER, 'Connection stopped')
+
+ def _connect_loop(self, retry):
+ # Iterate through the hosts a full cycle before starting over
+ status = None
+ for host, port in self.client.hosts:
+ if self.client._stopped.is_set():
+ status = STOP_CONNECTING
+ break
+ status = self._connect_attempt(host, port, retry)
+ if status is STOP_CONNECTING:
+ break
+
+ if status is STOP_CONNECTING:
+ return STOP_CONNECTING
+ else:
+ raise ForceRetryError('Reconnecting')
+
+ def _connect_attempt(self, host, port, retry):
+ client = self.client
+ TimeoutError = self.handler.timeout_exception
+ close_connection = False
+
+ self._socket = None
+
+ # Were we given a r/w server? If so, use that instead
+ if self._rw_server:
+ self.logger.log(BLATHER,
+ "Found r/w server to use, %s:%s", host, port)
+ host, port = self._rw_server
+ self._rw_server = None
+
+ if client._state != KeeperState.CONNECTING:
+ client._session_callback(KeeperState.CONNECTING)
+
+ try:
+ read_timeout, connect_timeout = self._connect(host, port)
+ read_timeout = read_timeout / 1000.0
+ connect_timeout = connect_timeout / 1000.0
+ retry.reset()
+ self._xid = 0
+
+ while not close_connection:
+ # Watch for something to read or send
+ jitter_time = random.randint(0, 40) / 100.0
+ # Ensure our timeout is positive
+ timeout = max([read_timeout / 2.0 - jitter_time, jitter_time])
+ s = self.handler.select([self._socket, self._read_pipe],
+ [], [], timeout)[0]
+
+ if not s:
+ if self.ping_outstanding.is_set():
+ self.ping_outstanding.clear()
+ raise ConnectionDropped(
+ "outstanding heartbeat ping not received")
+ self._send_ping(connect_timeout)
+ elif s[0] == self._socket:
+ response = self._read_socket(read_timeout)
+ close_connection = response == CLOSE_RESPONSE
+ else:
+ self._send_request(read_timeout, connect_timeout)
+
+ self.logger.info('Closing connection to %s:%s', host, port)
+ client._session_callback(KeeperState.CLOSED)
+ return STOP_CONNECTING
+ except (ConnectionDropped, TimeoutError) as e:
+ if isinstance(e, ConnectionDropped):
+ self.logger.warning('Connection dropped: %s', e)
+ else:
+ self.logger.warning('Connection time-out')
+ if client._state != KeeperState.CONNECTING:
+ self.logger.warning("Transition to CONNECTING")
+ client._session_callback(KeeperState.CONNECTING)
+ except AuthFailedError:
+ retry.reset()
+ self.logger.warning('AUTH_FAILED closing')
+ client._session_callback(KeeperState.AUTH_FAILED)
+ return STOP_CONNECTING
+ except SessionExpiredError:
+ retry.reset()
+ self.logger.warning('Session has expired')
+ client._session_callback(KeeperState.EXPIRED_SESSION)
+ except RWServerAvailable:
+ retry.reset()
+ self.logger.warning('Found a RW server, dropping connection')
+ client._session_callback(KeeperState.CONNECTING)
+ except Exception:
+ self.logger.exception('Unhandled exception in connection loop')
+ raise
+ finally:
+ if self._socket is not None:
+ self._socket.close()
+
+ def _connect(self, host, port):
+ client = self.client
+ self.logger.info('Connecting to %s:%s', host, port)
+
+ self.logger.log(BLATHER,
+ ' Using session_id: %r session_passwd: %s',
+ client._session_id,
+ hexlify(client._session_passwd))
+
+ with self._socket_error_handling():
+ self._socket = self.handler.create_connection(
+ (host, port), client._session_timeout / 1000.0)
+
+ self._socket.setblocking(0)
+
+ connect = Connect(0, client.last_zxid, client._session_timeout,
+ client._session_id or 0, client._session_passwd,
+ client.read_only)
+
+ connect_result, zxid = self._invoke(client._session_timeout, connect)
+
+ if connect_result.time_out <= 0:
+ raise SessionExpiredError("Session has expired")
+
+ if zxid:
+ client.last_zxid = zxid
+
+ # Load return values
+ client._session_id = connect_result.session_id
+ client._protocol_version = connect_result.protocol_version
+ negotiated_session_timeout = connect_result.time_out
+ connect_timeout = negotiated_session_timeout / len(client.hosts)
+ read_timeout = negotiated_session_timeout * 2.0 / 3.0
+ client._session_passwd = connect_result.passwd
+
+ self.logger.log(BLATHER,
+ 'Session created, session_id: %r session_passwd: %s\n'
+ ' negotiated session timeout: %s\n'
+ ' connect timeout: %s\n'
+ ' read timeout: %s', client._session_id,
+ hexlify(client._session_passwd),
+ negotiated_session_timeout, connect_timeout,
+ read_timeout)
+
+ if connect_result.read_only:
+ client._session_callback(KeeperState.CONNECTED_RO)
+ self._ro_mode = iter(self._server_pinger())
+ else:
+ client._session_callback(KeeperState.CONNECTED)
+ self._ro_mode = None
+
+ for scheme, auth in client.auth_data:
+ ap = Auth(0, scheme, auth)
+ zxid = self._invoke(connect_timeout, ap, xid=AUTH_XID)
+ if zxid:
+ client.last_zxid = zxid
+ return read_timeout, connect_timeout
diff --git a/slider-agent/src/main/python/kazoo/protocol/paths.py b/slider-agent/src/main/python/kazoo/protocol/paths.py
new file mode 100644
index 0000000..e37c1a7
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/paths.py
@@ -0,0 +1,55 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+def normpath(path, trailing=False):
+ """Normalize path, eliminating double slashes, etc."""
+ comps = path.split('/')
+ new_comps = []
+ for comp in comps:
+ if comp == '':
+ continue
+ if comp in ('.', '..'):
+ raise ValueError('relative paths not allowed')
+ new_comps.append(comp)
+ new_path = '/'.join(new_comps)
+ if trailing is True and path.endswith('/'):
+ new_path += '/'
+ if path.startswith('/'):
+ return '/' + new_path
+ return new_path
+
+
+def join(a, *p):
+ """Join two or more pathname components, inserting '/' as needed.
+
+ If any component is an absolute path, all previous path components
+ will be discarded.
+
+ """
+ path = a
+ for b in p:
+ if b.startswith('/'):
+ path = b
+ elif path == '' or path.endswith('/'):
+ path += b
+ else:
+ path += '/' + b
+ return path
+
+
+def isabs(s):
+ """Test whether a path is absolute"""
+ return s.startswith('/')
+
+
+def basename(p):
+ """Returns the final component of a pathname"""
+ i = p.rfind('/') + 1
+ return p[i:]
+
+
+def _prefix_root(root, path, trailing=False):
+ """Prepend a root to a path. """
+ return normpath(join(_norm_root(root), path.lstrip('/')), trailing=trailing)
+
+
+def _norm_root(root):
+ return normpath(join('/', root))
diff --git a/slider-agent/src/main/python/kazoo/protocol/serialization.py b/slider-agent/src/main/python/kazoo/protocol/serialization.py
new file mode 100644
index 0000000..3b5df6c
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/serialization.py
@@ -0,0 +1,397 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Serializers, Deserializers, and NamedTuple objects"""
+from collections import namedtuple
+import struct
+
+from kazoo.exceptions import EXCEPTIONS
+from kazoo.protocol.states import ZnodeStat
+from kazoo.security import ACL
+from kazoo.security import Id
+
+# Struct objects with formats compiled
+bool_struct = struct.Struct('B')
+int_struct = struct.Struct('!i')
+int_int_struct = struct.Struct('!ii')
+int_int_long_struct = struct.Struct('!iiq')
+
+int_long_int_long_struct = struct.Struct('!iqiq')
+multiheader_struct = struct.Struct('!iBi')
+reply_header_struct = struct.Struct('!iqi')
+stat_struct = struct.Struct('!qqqqiiiqiiq')
+
+try: # pragma: nocover
+ basestring
+except NameError:
+ basestring = str
+
+
+def read_string(buffer, offset):
+ """Reads an int specified buffer into a string and returns the
+ string and the new offset in the buffer"""
+ length = int_struct.unpack_from(buffer, offset)[0]
+ offset += int_struct.size
+ if length < 0:
+ return None, offset
+ else:
+ index = offset
+ offset += length
+ return buffer[index:index + length].decode('utf-8'), offset
+
+
+def read_acl(bytes, offset):
+ perms = int_struct.unpack_from(bytes, offset)[0]
+ offset += int_struct.size
+ scheme, offset = read_string(bytes, offset)
+ id, offset = read_string(bytes, offset)
+ return ACL(perms, Id(scheme, id)), offset
+
+
+def write_string(bytes):
+ if not bytes:
+ return int_struct.pack(-1)
+ else:
+ utf8_str = bytes.encode('utf-8')
+ return int_struct.pack(len(utf8_str)) + utf8_str
+
+
+def write_buffer(bytes):
+ if bytes is None:
+ return int_struct.pack(-1)
+ else:
+ return int_struct.pack(len(bytes)) + bytes
+
+
+def read_buffer(bytes, offset):
+ length = int_struct.unpack_from(bytes, offset)[0]
+ offset += int_struct.size
+ if length < 0:
+ return None, offset
+ else:
+ index = offset
+ offset += length
+ return bytes[index:index + length], offset
+
+
+class Close(namedtuple('Close', '')):
+ type = -11
+
+ @classmethod
+ def serialize(cls):
+ return b''
+
+CloseInstance = Close()
+
+
+class Ping(namedtuple('Ping', '')):
+ type = 11
+
+ @classmethod
+ def serialize(cls):
+ return b''
+
+PingInstance = Ping()
+
+
+class Connect(namedtuple('Connect', 'protocol_version last_zxid_seen'
+ ' time_out session_id passwd read_only')):
+ type = None
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(int_long_int_long_struct.pack(
+ self.protocol_version, self.last_zxid_seen, self.time_out,
+ self.session_id))
+ b.extend(write_buffer(self.passwd))
+ b.extend([1 if self.read_only else 0])
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ proto_version, timeout, session_id = int_int_long_struct.unpack_from(
+ bytes, offset)
+ offset += int_int_long_struct.size
+ password, offset = read_buffer(bytes, offset)
+
+ try:
+ read_only = bool_struct.unpack_from(bytes, offset)[0] is 1
+ offset += bool_struct.size
+ except struct.error:
+ read_only = False
+ return cls(proto_version, 0, timeout, session_id, password,
+ read_only), offset
+
+
+class Create(namedtuple('Create', 'path data acl flags')):
+ type = 1
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend(write_buffer(self.data))
+ b.extend(int_struct.pack(len(self.acl)))
+ for acl in self.acl:
+ b.extend(int_struct.pack(acl.perms) +
+ write_string(acl.id.scheme) + write_string(acl.id.id))
+ b.extend(int_struct.pack(self.flags))
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ return read_string(bytes, offset)[0]
+
+
+class Delete(namedtuple('Delete', 'path version')):
+ type = 2
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend(int_struct.pack(self.version))
+ return b
+
+ @classmethod
+ def deserialize(self, bytes, offset):
+ return True
+
+
+class Exists(namedtuple('Exists', 'path watcher')):
+ type = 3
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend([1 if self.watcher else 0])
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+ return stat if stat.czxid != -1 else None
+
+
+class GetData(namedtuple('GetData', 'path watcher')):
+ type = 4
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend([1 if self.watcher else 0])
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ data, offset = read_buffer(bytes, offset)
+ stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+ return data, stat
+
+
+class SetData(namedtuple('SetData', 'path data version')):
+ type = 5
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend(write_buffer(self.data))
+ b.extend(int_struct.pack(self.version))
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ return ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+
+
+class GetACL(namedtuple('GetACL', 'path')):
+ type = 6
+
+ def serialize(self):
+ return bytearray(write_string(self.path))
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ count = int_struct.unpack_from(bytes, offset)[0]
+ offset += int_struct.size
+ if count == -1: # pragma: nocover
+ return []
+
+ acls = []
+ for c in range(count):
+ acl, offset = read_acl(bytes, offset)
+ acls.append(acl)
+ stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+ return acls, stat
+
+
+class SetACL(namedtuple('SetACL', 'path acls version')):
+ type = 7
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend(int_struct.pack(len(self.acls)))
+ for acl in self.acls:
+ b.extend(int_struct.pack(acl.perms) +
+ write_string(acl.id.scheme) + write_string(acl.id.id))
+ b.extend(int_struct.pack(self.version))
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ return ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+
+
+class GetChildren(namedtuple('GetChildren', 'path watcher')):
+ type = 8
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend([1 if self.watcher else 0])
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ count = int_struct.unpack_from(bytes, offset)[0]
+ offset += int_struct.size
+ if count == -1: # pragma: nocover
+ return []
+
+ children = []
+ for c in range(count):
+ child, offset = read_string(bytes, offset)
+ children.append(child)
+ return children
+
+
+class Sync(namedtuple('Sync', 'path')):
+ type = 9
+
+ def serialize(self):
+ return write_string(self.path)
+
+ @classmethod
+ def deserialize(cls, buffer, offset):
+ return read_string(buffer, offset)[0]
+
+
+class GetChildren2(namedtuple('GetChildren2', 'path watcher')):
+ type = 12
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend([1 if self.watcher else 0])
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ count = int_struct.unpack_from(bytes, offset)[0]
+ offset += int_struct.size
+ if count == -1: # pragma: nocover
+ return []
+
+ children = []
+ for c in range(count):
+ child, offset = read_string(bytes, offset)
+ children.append(child)
+ stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+ return children, stat
+
+
+class CheckVersion(namedtuple('CheckVersion', 'path version')):
+ type = 13
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend(int_struct.pack(self.version))
+ return b
+
+
+class Transaction(namedtuple('Transaction', 'operations')):
+ type = 14
+
+ def serialize(self):
+ b = bytearray()
+ for op in self.operations:
+ b.extend(MultiHeader(op.type, False, -1).serialize() +
+ op.serialize())
+ return b + multiheader_struct.pack(-1, True, -1)
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ header = MultiHeader(None, False, None)
+ results = []
+ response = None
+ while not header.done:
+ if header.type == Create.type:
+ response, offset = read_string(bytes, offset)
+ elif header.type == Delete.type:
+ response = True
+ elif header.type == SetData.type:
+ response = ZnodeStat._make(
+ stat_struct.unpack_from(bytes, offset))
+ offset += stat_struct.size
+ elif header.type == CheckVersion.type:
+ response = True
+ elif header.type == -1:
+ err = int_struct.unpack_from(bytes, offset)[0]
+ offset += int_struct.size
+ response = EXCEPTIONS[err]()
+ if response:
+ results.append(response)
+ header, offset = MultiHeader.deserialize(bytes, offset)
+ return results
+
+ @staticmethod
+ def unchroot(client, response):
+ resp = []
+ for result in response:
+ if isinstance(result, basestring):
+ resp.append(client.unchroot(result))
+ else:
+ resp.append(result)
+ return resp
+
+
+class Auth(namedtuple('Auth', 'auth_type scheme auth')):
+ type = 100
+
+ def serialize(self):
+ return (int_struct.pack(self.auth_type) + write_string(self.scheme) +
+ write_string(self.auth))
+
+
+class Watch(namedtuple('Watch', 'type state path')):
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ """Given bytes and the current bytes offset, return the
+ type, state, path, and new offset"""
+ type, state = int_int_struct.unpack_from(bytes, offset)
+ offset += int_int_struct.size
+ path, offset = read_string(bytes, offset)
+ return cls(type, state, path), offset
+
+
+class ReplyHeader(namedtuple('ReplyHeader', 'xid, zxid, err')):
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ """Given bytes and the current bytes offset, return a
+ :class:`ReplyHeader` instance and the new offset"""
+ new_offset = offset + reply_header_struct.size
+ return cls._make(
+ reply_header_struct.unpack_from(bytes, offset)), new_offset
+
+
+class MultiHeader(namedtuple('MultiHeader', 'type done err')):
+ def serialize(self):
+ b = bytearray()
+ b.extend(int_struct.pack(self.type))
+ b.extend([1 if self.done else 0])
+ b.extend(int_struct.pack(self.err))
+ return b
+
+ @classmethod
+ def deserialize(cls, bytes, offset):
+ t, done, err = multiheader_struct.unpack_from(bytes, offset)
+ offset += multiheader_struct.size
+ return cls(t, done is 1, err), offset
diff --git a/slider-agent/src/main/python/kazoo/protocol/states.py b/slider-agent/src/main/python/kazoo/protocol/states.py
new file mode 100644
index 0000000..d09c2c6
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/states.py
@@ -0,0 +1,238 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo State and Event objects"""
+from collections import namedtuple
+
+
+class KazooState(object):
+ """High level connection state values
+
+ States inspired by Netflix Curator.
+
+ .. attribute:: SUSPENDED
+
+ The connection has been lost but may be recovered. We should
+ operate in a "safe mode" until then. When the connection is
+ resumed, it may be discovered that the session expired. A
+ client should not assume that locks are valid during this
+ time.
+
+ .. attribute:: CONNECTED
+
+ The connection is alive and well.
+
+ .. attribute:: LOST
+
+ The connection has been confirmed dead. Any ephemeral nodes
+ will need to be recreated upon re-establishing a connection.
+ If locks were acquired or recipes using ephemeral nodes are in
+ use, they can be considered lost as well.
+
+ """
+ SUSPENDED = "SUSPENDED"
+ CONNECTED = "CONNECTED"
+ LOST = "LOST"
+
+
+class KeeperState(object):
+ """Zookeeper State
+
+ Represents the Zookeeper state. Watch functions will receive a
+ :class:`KeeperState` attribute as their state argument.
+
+ .. attribute:: AUTH_FAILED
+
+ Authentication has failed, this is an unrecoverable error.
+
+ .. attribute:: CONNECTED
+
+ Zookeeper is connected.
+
+ .. attribute:: CONNECTED_RO
+
+ Zookeeper is connected in read-only state.
+
+ .. attribute:: CONNECTING
+
+ Zookeeper is currently attempting to establish a connection.
+
+ .. attribute:: EXPIRED_SESSION
+
+ The prior session was invalid, all prior ephemeral nodes are
+ gone.
+
+ """
+ AUTH_FAILED = 'AUTH_FAILED'
+ CONNECTED = 'CONNECTED'
+ CONNECTED_RO = 'CONNECTED_RO'
+ CONNECTING = 'CONNECTING'
+ CLOSED = 'CLOSED'
+ EXPIRED_SESSION = 'EXPIRED_SESSION'
+
+
+class EventType(object):
+ """Zookeeper Event
+
+ Represents a Zookeeper event. Events trigger watch functions which
+ will receive a :class:`EventType` attribute as their event
+ argument.
+
+ .. attribute:: CREATED
+
+ A node has been created.
+
+ .. attribute:: DELETED
+
+ A node has been deleted.
+
+ .. attribute:: CHANGED
+
+ The data for a node has changed.
+
+ .. attribute:: CHILD
+
+ The children under a node have changed (a child was added or
+ removed). This event does not indicate the data for a child
+ node has changed, which must have its own watch established.
+
+ """
+ CREATED = 'CREATED'
+ DELETED = 'DELETED'
+ CHANGED = 'CHANGED'
+ CHILD = 'CHILD'
+
+EVENT_TYPE_MAP = {
+ 1: EventType.CREATED,
+ 2: EventType.DELETED,
+ 3: EventType.CHANGED,
+ 4: EventType.CHILD
+}
+
+
+class WatchedEvent(namedtuple('WatchedEvent', ('type', 'state', 'path'))):
+ """A change on ZooKeeper that a Watcher is able to respond to.
+
+ The :class:`WatchedEvent` includes exactly what happened, the
+ current state of ZooKeeper, and the path of the node that was
+ involved in the event. An instance of :class:`WatchedEvent` will be
+ passed to registered watch functions.
+
+ .. attribute:: type
+
+ A :class:`EventType` attribute indicating the event type.
+
+ .. attribute:: state
+
+ A :class:`KeeperState` attribute indicating the Zookeeper
+ state.
+
+ .. attribute:: path
+
+ The path of the node for the watch event.
+
+ """
+
+
+class Callback(namedtuple('Callback', ('type', 'func', 'args'))):
+ """A callback that is handed to a handler for dispatch
+
+ :param type: Type of the callback, currently is only 'watch'
+ :param func: Callback function
+ :param args: Argument list for the callback function
+
+ """
+
+
+class ZnodeStat(namedtuple('ZnodeStat', 'czxid mzxid ctime mtime version'
+ ' cversion aversion ephemeralOwner dataLength'
+ ' numChildren pzxid')):
+ """A ZnodeStat structure with convenience properties
+
+ When getting the value of a node from Zookeeper, the properties for
+ the node known as a "Stat structure" will be retrieved. The
+ :class:`ZnodeStat` object provides access to the standard Stat
+ properties and additional properties that are more readable and use
+ Python time semantics (seconds since epoch instead of ms).
+
+ .. note::
+
+ The original Zookeeper Stat name is in parens next to the name
+ when it differs from the convenience attribute. These are **not
+ functions**, just attributes.
+
+ .. attribute:: creation_transaction_id (czxid)
+
+ The transaction id of the change that caused this znode to be
+ created.
+
+ .. attribute:: last_modified_transaction_id (mzxid)
+
+ The transaction id of the change that last modified this znode.
+
+ .. attribute:: created (ctime)
+
+ The time in seconds from epoch when this node was created.
+ (ctime is in milliseconds)
+
+ .. attribute:: last_modified (mtime)
+
+ The time in seconds from epoch when this znode was last
+ modified. (mtime is in milliseconds)
+
+ .. attribute:: version
+
+ The number of changes to the data of this znode.
+
+ .. attribute:: acl_version (aversion)
+
+ The number of changes to the ACL of this znode.
+
+ .. attribute:: owner_session_id (ephemeralOwner)
+
+ The session id of the owner of this znode if the znode is an
+ ephemeral node. If it is not an ephemeral node, it will be
+ `None`. (ephemeralOwner will be 0 if it is not ephemeral)
+
+ .. attribute:: data_length (dataLength)
+
+ The length of the data field of this znode.
+
+ .. attribute:: children_count (numChildren)
+
+ The number of children of this znode.
+
+ """
+ @property
+ def acl_version(self):
+ return self.aversion
+
+ @property
+ def children_version(self):
+ return self.cversion
+
+ @property
+ def created(self):
+ return self.ctime / 1000.0
+
+ @property
+ def last_modified(self):
+ return self.mtime / 1000.0
+
+ @property
+ def owner_session_id(self):
+ return self.ephemeralOwner or None
+
+ @property
+ def creation_transaction_id(self):
+ return self.czxid
+
+ @property
+ def last_modified_transaction_id(self):
+ return self.mzxid
+
+ @property
+ def data_length(self):
+ return self.dataLength
+
+ @property
+ def children_count(self):
+ return self.numChildren
diff --git a/slider-agent/src/main/python/kazoo/recipe/__init__.py b/slider-agent/src/main/python/kazoo/recipe/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/recipe/barrier.py b/slider-agent/src/main/python/kazoo/recipe/barrier.py
new file mode 100644
index 0000000..729ac07
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/barrier.py
@@ -0,0 +1,215 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Barriers
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+import os
+import socket
+import uuid
+
+from kazoo.protocol.states import EventType
+from kazoo.exceptions import KazooException
+from kazoo.exceptions import NoNodeError
+from kazoo.exceptions import NodeExistsError
+
+
+class Barrier(object):
+ """Kazoo Barrier
+
+ Implements a barrier to block processing of a set of nodes until
+ a condition is met at which point the nodes will be allowed to
+ proceed. The barrier is in place if its node exists.
+
+ .. warning::
+
+ The :meth:`wait` function does not handle connection loss and
+ may raise :exc:`~kazoo.exceptions.ConnectionLossException` if
+ the connection is lost while waiting.
+
+ """
+ def __init__(self, client, path):
+ """Create a Kazoo Barrier
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The barrier path to use.
+
+ """
+ self.client = client
+ self.path = path
+
+ def create(self):
+ """Establish the barrier if it doesn't exist already"""
+ self.client.retry(self.client.ensure_path, self.path)
+
+ def remove(self):
+ """Remove the barrier
+
+ :returns: Whether the barrier actually needed to be removed.
+ :rtype: bool
+
+ """
+ try:
+ self.client.retry(self.client.delete, self.path)
+ return True
+ except NoNodeError:
+ return False
+
+ def wait(self, timeout=None):
+ """Wait on the barrier to be cleared
+
+ :returns: True if the barrier has been cleared, otherwise
+ False.
+ :rtype: bool
+
+ """
+ cleared = self.client.handler.event_object()
+
+ def wait_for_clear(event):
+ if event.type == EventType.DELETED:
+ cleared.set()
+
+ exists = self.client.exists(self.path, watch=wait_for_clear)
+ if not exists:
+ return True
+
+ cleared.wait(timeout)
+ return cleared.is_set()
+
+
+class DoubleBarrier(object):
+ """Kazoo Double Barrier
+
+ Double barriers are used to synchronize the beginning and end of
+ a distributed task. The barrier blocks when entering it until all
+ the members have joined, and blocks when leaving until all the
+ members have left.
+
+ .. note::
+
+ You should register a listener for session loss as the process
+ will no longer be part of the barrier once the session is
+ gone. Connection losses will be retried with the default retry
+ policy.
+
+ """
+ def __init__(self, client, path, num_clients, identifier=None):
+ """Create a Double Barrier
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The barrier path to use.
+ :param num_clients: How many clients must enter the barrier to
+ proceed.
+ :type num_clients: int
+ :param identifier: An identifier to use for this member of the
+ barrier when participating. Defaults to the
+ hostname + process id.
+
+ """
+ self.client = client
+ self.path = path
+ self.num_clients = num_clients
+ self._identifier = identifier or '%s-%s' % (
+ socket.getfqdn(), os.getpid())
+ self.participating = False
+ self.assured_path = False
+ self.node_name = uuid.uuid4().hex
+ self.create_path = self.path + "/" + self.node_name
+
+ def enter(self):
+ """Enter the barrier, blocks until all nodes have entered"""
+ try:
+ self.client.retry(self._inner_enter)
+ self.participating = True
+ except KazooException:
+ # We failed to enter, best effort cleanup
+ self._best_effort_cleanup()
+ self.participating = False
+
+ def _inner_enter(self):
+ # make sure our barrier parent node exists
+ if not self.assured_path:
+ self.client.ensure_path(self.path)
+ self.assured_path = True
+
+ ready = self.client.handler.event_object()
+
+ try:
+ self.client.create(self.create_path,
+ self._identifier.encode('utf-8'), ephemeral=True)
+ except NodeExistsError:
+ pass
+
+ def created(event):
+ if event.type == EventType.CREATED:
+ ready.set()
+
+ self.client.exists(self.path + '/' + 'ready', watch=created)
+
+ children = self.client.get_children(self.path)
+
+ if len(children) < self.num_clients:
+ ready.wait()
+ else:
+ self.client.ensure_path(self.path + '/ready')
+ return True
+
+ def leave(self):
+ """Leave the barrier, blocks until all nodes have left"""
+ try:
+ self.client.retry(self._inner_leave)
+ except KazooException: # pragma: nocover
+ # Failed to cleanly leave
+ self._best_effort_cleanup()
+ self.participating = False
+
+ def _inner_leave(self):
+ # Delete the ready node if its around
+ try:
+ self.client.delete(self.path + '/ready')
+ except NoNodeError:
+ pass
+
+ while True:
+ children = self.client.get_children(self.path)
+ if not children:
+ return True
+
+ if len(children) == 1 and children[0] == self.node_name:
+ self.client.delete(self.create_path)
+ return True
+
+ children.sort()
+
+ ready = self.client.handler.event_object()
+
+ def deleted(event):
+ if event.type == EventType.DELETED:
+ ready.set()
+
+ if self.node_name == children[0]:
+ # We're first, wait on the highest to leave
+ if not self.client.exists(self.path + '/' + children[-1],
+ watch=deleted):
+ continue
+
+ ready.wait()
+ continue
+
+ # Delete our node
+ self.client.delete(self.create_path)
+
+ # Wait on the first
+ if not self.client.exists(self.path + '/' + children[0],
+ watch=deleted):
+ continue
+
+ # Wait for the lowest to be deleted
+ ready.wait()
+
+ def _best_effort_cleanup(self):
+ try:
+ self.client.retry(self.client.delete, self.create_path)
+ except NoNodeError:
+ pass
diff --git a/slider-agent/src/main/python/kazoo/recipe/counter.py b/slider-agent/src/main/python/kazoo/recipe/counter.py
new file mode 100644
index 0000000..026d724
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/counter.py
@@ -0,0 +1,95 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Counter
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+
+from kazoo.exceptions import BadVersionError
+from kazoo.retry import ForceRetryError
+
+
+class Counter(object):
+ """Kazoo Counter
+
+ A shared counter of either int or float values. Changes to the
+ counter are done atomically. The general retry policy is used to
+ retry operations if concurrent changes are detected.
+
+ The data is marshaled using `repr(value)` and converted back using
+ `type(counter.default)(value)` both using an ascii encoding. As
+ such other data types might be used for the counter value.
+
+ Counter changes can raise
+ :class:`~kazoo.exceptions.BadVersionError` if the retry policy
+ wasn't able to apply a change.
+
+ Example usage:
+
+ .. code-block:: python
+
+ zk = KazooClient()
+ counter = zk.Counter("/int")
+ counter += 2
+ counter -= 1
+ counter.value == 1
+
+ counter = zk.Counter("/float", default=1.0)
+ counter += 2.0
+ counter.value == 3.0
+
+ """
+ def __init__(self, client, path, default=0):
+ """Create a Kazoo Counter
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The counter path to use.
+ :param default: The default value.
+
+ """
+ self.client = client
+ self.path = path
+ self.default = default
+ self.default_type = type(default)
+ self._ensured_path = False
+
+ def _ensure_node(self):
+ if not self._ensured_path:
+ # make sure our node exists
+ self.client.ensure_path(self.path)
+ self._ensured_path = True
+
+ def _value(self):
+ self._ensure_node()
+ old, stat = self.client.get(self.path)
+ old = old.decode('ascii') if old != b'' else self.default
+ version = stat.version
+ data = self.default_type(old)
+ return data, version
+
+ @property
+ def value(self):
+ return self._value()[0]
+
+ def _change(self, value):
+ if not isinstance(value, self.default_type):
+ raise TypeError('invalid type for value change')
+ self.client.retry(self._inner_change, value)
+ return self
+
+ def _inner_change(self, value):
+ data, version = self._value()
+ data = repr(data + value).encode('ascii')
+ try:
+ self.client.set(self.path, data, version=version)
+ except BadVersionError: # pragma: nocover
+ raise ForceRetryError()
+
+ def __add__(self, value):
+ """Add value to counter."""
+ return self._change(value)
+
+ def __sub__(self, value):
+ """Subtract value from counter."""
+ return self._change(-value)
diff --git a/slider-agent/src/main/python/kazoo/recipe/election.py b/slider-agent/src/main/python/kazoo/recipe/election.py
new file mode 100644
index 0000000..5349e26
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/election.py
@@ -0,0 +1,80 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""ZooKeeper Leader Elections
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+from kazoo.exceptions import CancelledError
+
+
+class Election(object):
+ """Kazoo Basic Leader Election
+
+ Example usage with a :class:`~kazoo.client.KazooClient` instance::
+
+ zk = KazooClient()
+ election = zk.Election("/electionpath", "my-identifier")
+
+ # blocks until the election is won, then calls
+ # my_leader_function()
+ election.run(my_leader_function)
+
+ """
+ def __init__(self, client, path, identifier=None):
+ """Create a Kazoo Leader Election
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The election path to use.
+ :param identifier: Name to use for this lock contender. This
+ can be useful for querying to see who the
+ current lock contenders are.
+
+ """
+ self.lock = client.Lock(path, identifier)
+
+ def run(self, func, *args, **kwargs):
+ """Contend for the leadership
+
+ This call will block until either this contender is cancelled
+ or this contender wins the election and the provided leadership
+ function subsequently returns or fails.
+
+ :param func: A function to be called if/when the election is
+ won.
+ :param args: Arguments to leadership function.
+ :param kwargs: Keyword arguments to leadership function.
+
+ """
+ if not callable(func):
+ raise ValueError("leader function is not callable")
+
+ try:
+ with self.lock:
+ func(*args, **kwargs)
+
+ except CancelledError:
+ pass
+
+ def cancel(self):
+ """Cancel participation in the election
+
+ .. note::
+
+ If this contender has already been elected leader, this
+ method will not interrupt the leadership function.
+
+ """
+ self.lock.cancel()
+
+ def contenders(self):
+ """Return an ordered list of the current contenders in the
+ election
+
+ .. note::
+
+ If the contenders did not set an identifier, it will appear
+ as a blank string.
+
+ """
+ return self.lock.contenders()
diff --git a/slider-agent/src/main/python/kazoo/recipe/lock.py b/slider-agent/src/main/python/kazoo/recipe/lock.py
new file mode 100644
index 0000000..8f14847
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/lock.py
@@ -0,0 +1,521 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Locking Implementations
+
+:Maintainer: None
+:Status: Unknown
+
+Error Handling
+==============
+
+It's highly recommended to add a state listener with
+:meth:`~KazooClient.add_listener` and watch for
+:attr:`~KazooState.LOST` and :attr:`~KazooState.SUSPENDED` state
+changes and re-act appropriately. In the event that a
+:attr:`~KazooState.LOST` state occurs, its certain that the lock
+and/or the lease has been lost.
+
+"""
+import uuid
+
+from kazoo.retry import (
+ KazooRetry,
+ RetryFailedError,
+ ForceRetryError
+)
+from kazoo.exceptions import CancelledError
+from kazoo.exceptions import KazooException
+from kazoo.exceptions import LockTimeout
+from kazoo.exceptions import NoNodeError
+from kazoo.protocol.states import KazooState
+
+
+class Lock(object):
+ """Kazoo Lock
+
+ Example usage with a :class:`~kazoo.client.KazooClient` instance:
+
+ .. code-block:: python
+
+ zk = KazooClient()
+ lock = zk.Lock("/lockpath", "my-identifier")
+ with lock: # blocks waiting for lock acquisition
+ # do something with the lock
+
+ Note: This lock is re-entrant. Repeat calls after acquired will
+ continue to return ''True''.
+
+ """
+ _NODE_NAME = '__lock__'
+
+ def __init__(self, client, path, identifier=None):
+ """Create a Kazoo lock.
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The lock path to use.
+ :param identifier: Name to use for this lock contender. This
+ can be useful for querying to see who the
+ current lock contenders are.
+
+ """
+ self.client = client
+ self.path = path
+
+ # some data is written to the node. this can be queried via
+ # contenders() to see who is contending for the lock
+ self.data = str(identifier or "").encode('utf-8')
+
+ self.wake_event = client.handler.event_object()
+
+ # props to Netflix Curator for this trick. It is possible for our
+ # create request to succeed on the server, but for a failure to
+ # prevent us from getting back the full path name. We prefix our
+ # lock name with a uuid and can check for its presence on retry.
+ self.prefix = uuid.uuid4().hex + self._NODE_NAME
+ self.create_path = self.path + "/" + self.prefix
+
+ self.create_tried = False
+ self.is_acquired = False
+ self.assured_path = False
+ self.cancelled = False
+ self._retry = KazooRetry(max_tries=None,
+ sleep_func=client.handler.sleep_func)
+
+ def _ensure_path(self):
+ self.client.ensure_path(self.path)
+ self.assured_path = True
+
+ def cancel(self):
+ """Cancel a pending lock acquire."""
+ self.cancelled = True
+ self.wake_event.set()
+
+ def acquire(self, blocking=True, timeout=None):
+ """
+ Acquire the lock. By defaults blocks and waits forever.
+
+ :param blocking: Block until lock is obtained or return immediately.
+ :type blocking: bool
+ :param timeout: Don't wait forever to acquire the lock.
+ :type timeout: float or None
+
+ :returns: Was the lock acquired?
+ :rtype: bool
+
+ :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock
+ wasn't acquired within `timeout` seconds.
+
+ .. versionadded:: 1.1
+ The timeout option.
+ """
+ try:
+ retry = self._retry.copy()
+ retry.deadline = timeout
+ self.is_acquired = retry(self._inner_acquire,
+ blocking=blocking, timeout=timeout)
+ except RetryFailedError:
+ self._best_effort_cleanup()
+ except KazooException:
+ # if we did ultimately fail, attempt to clean up
+ self._best_effort_cleanup()
+ self.cancelled = False
+ raise
+
+ if not self.is_acquired:
+ self._delete_node(self.node)
+
+ return self.is_acquired
+
+ def _watch_session(self, state):
+ self.wake_event.set()
+ return True
+
+ def _inner_acquire(self, blocking, timeout):
+ # make sure our election parent node exists
+ if not self.assured_path:
+ self._ensure_path()
+
+ node = None
+ if self.create_tried:
+ node = self._find_node()
+ else:
+ self.create_tried = True
+
+ if not node:
+ node = self.client.create(self.create_path, self.data,
+ ephemeral=True, sequence=True)
+ # strip off path to node
+ node = node[len(self.path) + 1:]
+
+ self.node = node
+
+ while True:
+ self.wake_event.clear()
+
+ # bail out with an exception if cancellation has been requested
+ if self.cancelled:
+ raise CancelledError()
+
+ children = self._get_sorted_children()
+
+ try:
+ our_index = children.index(node)
+ except ValueError: # pragma: nocover
+ # somehow we aren't in the children -- probably we are
+ # recovering from a session failure and our ephemeral
+ # node was removed
+ raise ForceRetryError()
+
+ if self.acquired_lock(children, our_index):
+ return True
+
+ if not blocking:
+ return False
+
+ # otherwise we are in the mix. watch predecessor and bide our time
+ predecessor = self.path + "/" + children[our_index - 1]
+ self.client.add_listener(self._watch_session)
+ try:
+ if self.client.exists(predecessor, self._watch_predecessor):
+ self.wake_event.wait(timeout)
+ if not self.wake_event.isSet():
+ raise LockTimeout("Failed to acquire lock on %s after %s "
+ "seconds" % (self.path, timeout))
+ finally:
+ self.client.remove_listener(self._watch_session)
+
+ def acquired_lock(self, children, index):
+ return index == 0
+
+ def _watch_predecessor(self, event):
+ self.wake_event.set()
+
+ def _get_sorted_children(self):
+ children = self.client.get_children(self.path)
+
+ # can't just sort directly: the node names are prefixed by uuids
+ lockname = self._NODE_NAME
+ children.sort(key=lambda c: c[c.find(lockname) + len(lockname):])
+ return children
+
+ def _find_node(self):
+ children = self.client.get_children(self.path)
+ for child in children:
+ if child.startswith(self.prefix):
+ return child
+ return None
+
+ def _delete_node(self, node):
+ self.client.delete(self.path + "/" + node)
+
+ def _best_effort_cleanup(self):
+ try:
+ node = self._find_node()
+ if node:
+ self._delete_node(node)
+ except KazooException: # pragma: nocover
+ pass
+
+ def release(self):
+ """Release the lock immediately."""
+ return self.client.retry(self._inner_release)
+
+ def _inner_release(self):
+ if not self.is_acquired:
+ return False
+
+ try:
+ self._delete_node(self.node)
+ except NoNodeError: # pragma: nocover
+ pass
+
+ self.is_acquired = False
+ self.node = None
+
+ return True
+
+ def contenders(self):
+ """Return an ordered list of the current contenders for the
+ lock.
+
+ .. note::
+
+ If the contenders did not set an identifier, it will appear
+ as a blank string.
+
+ """
+ # make sure our election parent node exists
+ if not self.assured_path:
+ self._ensure_path()
+
+ children = self._get_sorted_children()
+
+ contenders = []
+ for child in children:
+ try:
+ data, stat = self.client.get(self.path + "/" + child)
+ contenders.append(data.decode('utf-8'))
+ except NoNodeError: # pragma: nocover
+ pass
+ return contenders
+
+ def __enter__(self):
+ self.acquire()
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.release()
+
+
+class Semaphore(object):
+ """A Zookeeper-based Semaphore
+
+ This synchronization primitive operates in the same manner as the
+ Python threading version only uses the concept of leases to
+ indicate how many available leases are available for the lock
+ rather than counting.
+
+ Example:
+
+ .. code-block:: python
+
+ zk = KazooClient()
+ semaphore = zk.Semaphore("/leasepath", "my-identifier")
+ with semaphore: # blocks waiting for lock acquisition
+ # do something with the semaphore
+
+ .. warning::
+
+ This class stores the allowed max_leases as the data on the
+ top-level semaphore node. The stored value is checked once
+ against the max_leases of each instance. This check is
+ performed when acquire is called the first time. The semaphore
+ node needs to be deleted to change the allowed leases.
+
+ .. versionadded:: 0.6
+ The Semaphore class.
+
+ .. versionadded:: 1.1
+ The max_leases check.
+
+ """
+ def __init__(self, client, path, identifier=None, max_leases=1):
+ """Create a Kazoo Lock
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The semaphore path to use.
+ :param identifier: Name to use for this lock contender. This
+ can be useful for querying to see who the
+ current lock contenders are.
+ :param max_leases: The maximum amount of leases available for
+ the semaphore.
+
+ """
+ # Implementation notes about how excessive thundering herd
+ # and watches are avoided
+ # - A node (lease pool) holds children for each lease in use
+ # - A lock is acquired for a process attempting to acquire a
+ # lease. If a lease is available, the ephemeral node is
+ # created in the lease pool and the lock is released.
+ # - Only the lock holder watches for children changes in the
+ # lease pool
+ self.client = client
+ self.path = path
+
+ # some data is written to the node. this can be queried via
+ # contenders() to see who is contending for the lock
+ self.data = str(identifier or "").encode('utf-8')
+ self.max_leases = max_leases
+ self.wake_event = client.handler.event_object()
+
+ self.create_path = self.path + "/" + uuid.uuid4().hex
+ self.lock_path = path + '-' + '__lock__'
+ self.is_acquired = False
+ self.assured_path = False
+ self.cancelled = False
+ self._session_expired = False
+
+ def _ensure_path(self):
+ result = self.client.ensure_path(self.path)
+ self.assured_path = True
+ if result is True:
+ # node did already exist
+ data, _ = self.client.get(self.path)
+ try:
+ leases = int(data.decode('utf-8'))
+ except (ValueError, TypeError):
+ # ignore non-numeric data, maybe the node data is used
+ # for other purposes
+ pass
+ else:
+ if leases != self.max_leases:
+ raise ValueError(
+ "Inconsistent max leases: %s, expected: %s" %
+ (leases, self.max_leases)
+ )
+ else:
+ self.client.set(self.path, str(self.max_leases).encode('utf-8'))
+
+ def cancel(self):
+ """Cancel a pending semaphore acquire."""
+ self.cancelled = True
+ self.wake_event.set()
+
+ def acquire(self, blocking=True, timeout=None):
+ """Acquire the semaphore. By defaults blocks and waits forever.
+
+ :param blocking: Block until semaphore is obtained or
+ return immediately.
+ :type blocking: bool
+ :param timeout: Don't wait forever to acquire the semaphore.
+ :type timeout: float or None
+
+ :returns: Was the semaphore acquired?
+ :rtype: bool
+
+ :raises:
+ ValueError if the max_leases value doesn't match the
+ stored value.
+
+ :exc:`~kazoo.exceptions.LockTimeout` if the semaphore
+ wasn't acquired within `timeout` seconds.
+
+ .. versionadded:: 1.1
+ The blocking, timeout arguments and the max_leases check.
+ """
+ # If the semaphore had previously been canceled, make sure to
+ # reset that state.
+ self.cancelled = False
+
+ try:
+ self.is_acquired = self.client.retry(
+ self._inner_acquire, blocking=blocking, timeout=timeout)
+ except KazooException:
+ # if we did ultimately fail, attempt to clean up
+ self._best_effort_cleanup()
+ self.cancelled = False
+ raise
+
+ return self.is_acquired
+
+ def _inner_acquire(self, blocking, timeout=None):
+ """Inner loop that runs from the top anytime a command hits a
+ retryable Zookeeper exception."""
+ self._session_expired = False
+ self.client.add_listener(self._watch_session)
+
+ if not self.assured_path:
+ self._ensure_path()
+
+ # Do we already have a lease?
+ if self.client.exists(self.create_path):
+ return True
+
+ with self.client.Lock(self.lock_path, self.data):
+ while True:
+ self.wake_event.clear()
+
+ # Attempt to grab our lease...
+ if self._get_lease():
+ return True
+
+ if blocking:
+ # If blocking, wait until self._watch_lease_change() is
+ # called before returning
+ self.wake_event.wait(timeout)
+ if not self.wake_event.isSet():
+ raise LockTimeout(
+ "Failed to acquire semaphore on %s "
+ "after %s seconds" % (self.path, timeout))
+ else:
+ # If not blocking, register another watch that will trigger
+ # self._get_lease() as soon as the children change again.
+ self.client.get_children(self.path, self._get_lease)
+ return False
+
+ def _watch_lease_change(self, event):
+ self.wake_event.set()
+
+ def _get_lease(self, data=None):
+ # Make sure the session is still valid
+ if self._session_expired:
+ raise ForceRetryError("Retry on session loss at top")
+
+ # Make sure that the request hasn't been canceled
+ if self.cancelled:
+ raise CancelledError("Semaphore cancelled")
+
+ # Get a list of the current potential lock holders. If they change,
+ # notify our wake_event object. This is used to unblock a blocking
+ # self._inner_acquire call.
+ children = self.client.get_children(self.path,
+ self._watch_lease_change)
+
+ # If there are leases available, acquire one
+ if len(children) < self.max_leases:
+ self.client.create(self.create_path, self.data, ephemeral=True)
+
+ # Check if our acquisition was successful or not. Update our state.
+ if self.client.exists(self.create_path):
+ self.is_acquired = True
+ else:
+ self.is_acquired = False
+
+ # Return current state
+ return self.is_acquired
+
+ def _watch_session(self, state):
+ if state == KazooState.LOST:
+ self._session_expired = True
+ self.wake_event.set()
+
+ # Return true to de-register
+ return True
+
+ def _best_effort_cleanup(self):
+ try:
+ self.client.delete(self.create_path)
+ except KazooException: # pragma: nocover
+ pass
+
+ def release(self):
+ """Release the lease immediately."""
+ return self.client.retry(self._inner_release)
+
+ def _inner_release(self):
+ if not self.is_acquired:
+ return False
+
+ try:
+ self.client.delete(self.create_path)
+ except NoNodeError: # pragma: nocover
+ pass
+ self.is_acquired = False
+ return True
+
+ def lease_holders(self):
+ """Return an unordered list of the current lease holders.
+
+ .. note::
+
+ If the lease holder did not set an identifier, it will
+ appear as a blank string.
+
+ """
+ if not self.client.exists(self.path):
+ return []
+
+ children = self.client.get_children(self.path)
+
+ lease_holders = []
+ for child in children:
+ try:
+ data, stat = self.client.get(self.path + "/" + child)
+ lease_holders.append(data.decode('utf-8'))
+ except NoNodeError: # pragma: nocover
+ pass
+ return lease_holders
+
+ def __enter__(self):
+ self.acquire()
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.release()
diff --git a/slider-agent/src/main/python/kazoo/recipe/partitioner.py b/slider-agent/src/main/python/kazoo/recipe/partitioner.py
new file mode 100644
index 0000000..0d92661
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/partitioner.py
@@ -0,0 +1,378 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Partitioner Implementation
+
+:Maintainer: None
+:Status: Unknown
+
+:class:`SetPartitioner` implements a partitioning scheme using
+Zookeeper for dividing up resources amongst members of a party.
+
+This is useful when there is a set of resources that should only be
+accessed by a single process at a time that multiple processes
+across a cluster might want to divide up.
+
+Example Use-Case
+----------------
+
+- Multiple workers across a cluster need to divide up a list of queues
+ so that no two workers own the same queue.
+
+"""
+import logging
+import os
+import socket
+from functools import partial
+
+from kazoo.exceptions import KazooException
+from kazoo.protocol.states import KazooState
+from kazoo.recipe.watchers import PatientChildrenWatch
+
+log = logging.getLogger(__name__)
+
+
+class PartitionState(object):
+ """High level partition state values
+
+ .. attribute:: ALLOCATING
+
+ The set needs to be partitioned, and may require an existing
+ partition set to be released before acquiring a new partition
+ of the set.
+
+ .. attribute:: ACQUIRED
+
+ The set has been partitioned and acquired.
+
+ .. attribute:: RELEASE
+
+ The set needs to be repartitioned, and the current partitions
+ must be released before a new allocation can be made.
+
+ .. attribute:: FAILURE
+
+ The set partition has failed. This occurs when the maximum
+ time to partition the set is exceeded or the Zookeeper session
+ is lost. The partitioner is unusable after this state and must
+ be recreated.
+
+ """
+ ALLOCATING = "ALLOCATING"
+ ACQUIRED = "ACQUIRED"
+ RELEASE = "RELEASE"
+ FAILURE = "FAILURE"
+
+
+class SetPartitioner(object):
+ """Partitions a set amongst members of a party
+
+ This class will partition a set amongst members of a party such
+ that each member will be given zero or more items of the set and
+ each set item will be given to a single member. When new members
+ enter or leave the party, the set will be re-partitioned amongst
+ the members.
+
+ When the :class:`SetPartitioner` enters the
+ :attr:`~PartitionState.FAILURE` state, it is unrecoverable
+ and a new :class:`SetPartitioner` should be created.
+
+ Example:
+
+ .. code-block:: python
+
+ from kazoo.client import KazooClient
+ client = KazooClient()
+
+ qp = client.SetPartitioner(
+ path='/work_queues', set=('queue-1', 'queue-2', 'queue-3'))
+
+ while 1:
+ if qp.failed:
+ raise Exception("Lost or unable to acquire partition")
+ elif qp.release:
+ qp.release_set()
+ elif qp.acquired:
+ for partition in qp:
+ # Do something with each partition
+ elif qp.allocating:
+ qp.wait_for_acquire()
+
+ **State Transitions**
+
+ When created, the :class:`SetPartitioner` enters the
+ :attr:`PartitionState.ALLOCATING` state.
+
+ :attr:`~PartitionState.ALLOCATING` ->
+ :attr:`~PartitionState.ACQUIRED`
+
+ Set was partitioned successfully, the partition list assigned
+ is accessible via list/iter methods or calling list() on the
+ :class:`SetPartitioner` instance.
+
+ :attr:`~PartitionState.ALLOCATING` ->
+ :attr:`~PartitionState.FAILURE`
+
+ Allocating the set failed either due to a Zookeeper session
+ expiration, or failure to acquire the items of the set within
+ the timeout period.
+
+ :attr:`~PartitionState.ACQUIRED` ->
+ :attr:`~PartitionState.RELEASE`
+
+ The members of the party have changed, and the set needs to be
+ repartitioned. :meth:`SetPartitioner.release` should be called
+ as soon as possible.
+
+ :attr:`~PartitionState.ACQUIRED` ->
+ :attr:`~PartitionState.FAILURE`
+
+ The current partition was lost due to a Zookeeper session
+ expiration.
+
+ :attr:`~PartitionState.RELEASE` ->
+ :attr:`~PartitionState.ALLOCATING`
+
+ The current partition was released and is being re-allocated.
+
+ """
+ def __init__(self, client, path, set, partition_func=None,
+ identifier=None, time_boundary=30):
+ """Create a :class:`~SetPartitioner` instance
+
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The partition path to use.
+ :param set: The set of items to partition.
+ :param partition_func: A function to use to decide how to
+ partition the set.
+ :param identifier: An identifier to use for this member of the
+ party when participating. Defaults to the
+ hostname + process id.
+ :param time_boundary: How long the party members must be stable
+ before allocation can complete.
+
+ """
+ self.state = PartitionState.ALLOCATING
+
+ self._client = client
+ self._path = path
+ self._set = set
+ self._partition_set = []
+ self._partition_func = partition_func or self._partitioner
+ self._identifier = identifier or '%s-%s' % (
+ socket.getfqdn(), os.getpid())
+ self._locks = []
+ self._lock_path = '/'.join([path, 'locks'])
+ self._party_path = '/'.join([path, 'party'])
+ self._time_boundary = time_boundary
+
+ self._acquire_event = client.handler.event_object()
+
+ # Create basic path nodes
+ client.ensure_path(path)
+ client.ensure_path(self._lock_path)
+ client.ensure_path(self._party_path)
+
+ # Join the party
+ self._party = client.ShallowParty(self._party_path,
+ identifier=self._identifier)
+ self._party.join()
+
+ self._was_allocated = False
+ self._state_change = client.handler.rlock_object()
+ client.add_listener(self._establish_sessionwatch)
+
+ # Now watch the party and set the callback on the async result
+ # so we know when we're ready
+ self._children_updated = False
+ self._child_watching(self._allocate_transition, async=True)
+
+ def __iter__(self):
+ """Return the partitions in this partition set"""
+ for partition in self._partition_set:
+ yield partition
+
+ @property
+ def failed(self):
+ """Corresponds to the :attr:`PartitionState.FAILURE` state"""
+ return self.state == PartitionState.FAILURE
+
+ @property
+ def release(self):
+ """Corresponds to the :attr:`PartitionState.RELEASE` state"""
+ return self.state == PartitionState.RELEASE
+
+ @property
+ def allocating(self):
+ """Corresponds to the :attr:`PartitionState.ALLOCATING`
+ state"""
+ return self.state == PartitionState.ALLOCATING
+
+ @property
+ def acquired(self):
+ """Corresponds to the :attr:`PartitionState.ACQUIRED` state"""
+ return self.state == PartitionState.ACQUIRED
+
+ def wait_for_acquire(self, timeout=30):
+ """Wait for the set to be partitioned and acquired
+
+ :param timeout: How long to wait before returning.
+ :type timeout: int
+
+ """
+ self._acquire_event.wait(timeout)
+
+ def release_set(self):
+ """Call to release the set
+
+ This method begins the step of allocating once the set has
+ been released.
+
+ """
+ self._release_locks()
+ if self._locks: # pragma: nocover
+ # This shouldn't happen, it means we couldn't release our
+ # locks, abort
+ self._fail_out()
+ return
+ else:
+ with self._state_change:
+ if self.failed:
+ return
+ self.state = PartitionState.ALLOCATING
+ self._child_watching(self._allocate_transition, async=True)
+
+ def finish(self):
+ """Call to release the set and leave the party"""
+ self._release_locks()
+ self._fail_out()
+
+ def _fail_out(self):
+ with self._state_change:
+ self.state = PartitionState.FAILURE
+ if self._party.participating:
+ try:
+ self._party.leave()
+ except KazooException: # pragma: nocover
+ pass
+
+ def _allocate_transition(self, result):
+ """Called when in allocating mode, and the children settled"""
+ # Did we get an exception waiting for children to settle?
+ if result.exception: # pragma: nocover
+ self._fail_out()
+ return
+
+ children, async_result = result.get()
+ self._children_updated = False
+
+ # Add a callback when children change on the async_result
+ def updated(result):
+ with self._state_change:
+ if self.acquired:
+ self.state = PartitionState.RELEASE
+ self._children_updated = True
+
+ async_result.rawlink(updated)
+
+ # Split up the set
+ self._partition_set = self._partition_func(
+ self._identifier, list(self._party), self._set)
+
+ # Proceed to acquire locks for the working set as needed
+ for member in self._partition_set:
+ if self._children_updated or self.failed:
+ # Still haven't settled down, release locks acquired
+ # so far and go back
+ return self._abort_lock_acquisition()
+
+ lock = self._client.Lock(self._lock_path + '/' +
+ str(member))
+ try:
+ lock.acquire()
+ except KazooException: # pragma: nocover
+ return self.finish()
+ self._locks.append(lock)
+
+ # All locks acquired! Time for state transition, make sure
+ # we didn't inadvertently get lost thus far
+ with self._state_change:
+ if self.failed: # pragma: nocover
+ return self.finish()
+ self.state = PartitionState.ACQUIRED
+ self._acquire_event.set()
+
+ def _release_locks(self):
+ """Attempt to completely remove all the locks"""
+ self._acquire_event.clear()
+ for lock in self._locks[:]:
+ try:
+ lock.release()
+ except KazooException: # pragma: nocover
+ # We proceed to remove as many as possible, and leave
+ # the ones we couldn't remove
+ pass
+ else:
+ self._locks.remove(lock)
+
+ def _abort_lock_acquisition(self):
+ """Called during lock acquisition if a party change occurs"""
+ self._partition_set = []
+ self._release_locks()
+ if self._locks:
+ # This shouldn't happen, it means we couldn't release our
+ # locks, abort
+ self._fail_out()
+ return
+ return self._child_watching(self._allocate_transition)
+
+ def _child_watching(self, func=None, async=False):
+ """Called when children are being watched to stabilize
+
+ This actually returns immediately, child watcher spins up a
+ new thread/greenlet and waits for it to stabilize before
+ any callbacks might run.
+
+ """
+ watcher = PatientChildrenWatch(self._client, self._party_path,
+ self._time_boundary)
+ asy = watcher.start()
+ if func is not None:
+ # We spin up the function in a separate thread/greenlet
+ # to ensure that the rawlink's it might use won't be
+ # blocked
+ if async:
+ func = partial(self._client.handler.spawn, func)
+ asy.rawlink(func)
+ return asy
+
+ def _establish_sessionwatch(self, state):
+ """Register ourself to listen for session events, we shut down
+ if we become lost"""
+ with self._state_change:
+ # Handle network partition: If connection gets suspended,
+ # change state to ALLOCATING if we had already ACQUIRED. This way
+ # the caller does not process the members since we could eventually
+ # lose session get repartitioned. If we got connected after a suspension
+ # it means we've not lost the session and still have our members. Hence,
+ # restore to ACQUIRED
+ if state == KazooState.SUSPENDED:
+ if self.state == PartitionState.ACQUIRED:
+ self._was_allocated = True
+ self.state = PartitionState.ALLOCATING
+ elif state == KazooState.CONNECTED:
+ if self._was_allocated:
+ self._was_allocated = False
+ self.state = PartitionState.ACQUIRED
+
+ if state == KazooState.LOST:
+ self._client.handler.spawn(self._fail_out)
+ return True
+
+ def _partitioner(self, identifier, members, partitions):
+ # Ensure consistent order of partitions/members
+ all_partitions = sorted(partitions)
+ workers = sorted(members)
+
+ i = workers.index(identifier)
+ # Now return the partition list starting at our location and
+ # skipping the other workers
+ return all_partitions[i::len(workers)]
diff --git a/slider-agent/src/main/python/kazoo/recipe/party.py b/slider-agent/src/main/python/kazoo/recipe/party.py
new file mode 100644
index 0000000..4fd873e
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/party.py
@@ -0,0 +1,119 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Party
+
+:Maintainer: Ben Bangert <ben@groovie.org>
+:Status: Production
+
+A Zookeeper pool of party members. The :class:`Party` object can be
+used for determining members of a party.
+
+"""
+import uuid
+
+from kazoo.exceptions import NodeExistsError, NoNodeError
+
+
+class BaseParty(object):
+ """Base implementation of a party."""
+ def __init__(self, client, path, identifier=None):
+ """
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The party path to use.
+ :param identifier: An identifier to use for this member of the
+ party when participating.
+
+ """
+ self.client = client
+ self.path = path
+ self.data = str(identifier or "").encode('utf-8')
+ self.ensured_path = False
+ self.participating = False
+
+ def _ensure_parent(self):
+ if not self.ensured_path:
+ # make sure our parent node exists
+ self.client.ensure_path(self.path)
+ self.ensured_path = True
+
+ def join(self):
+ """Join the party"""
+ return self.client.retry(self._inner_join)
+
+ def _inner_join(self):
+ self._ensure_parent()
+ try:
+ self.client.create(self.create_path, self.data, ephemeral=True)
+ self.participating = True
+ except NodeExistsError:
+ # node was already created, perhaps we are recovering from a
+ # suspended connection
+ self.participating = True
+
+ def leave(self):
+ """Leave the party"""
+ self.participating = False
+ return self.client.retry(self._inner_leave)
+
+ def _inner_leave(self):
+ try:
+ self.client.delete(self.create_path)
+ except NoNodeError:
+ return False
+ return True
+
+ def __len__(self):
+ """Return a count of participating clients"""
+ self._ensure_parent()
+ return len(self._get_children())
+
+ def _get_children(self):
+ return self.client.retry(self.client.get_children, self.path)
+
+
+class Party(BaseParty):
+ """Simple pool of participating processes"""
+ _NODE_NAME = "__party__"
+
+ def __init__(self, client, path, identifier=None):
+ BaseParty.__init__(self, client, path, identifier=identifier)
+ self.node = uuid.uuid4().hex + self._NODE_NAME
+ self.create_path = self.path + "/" + self.node
+
+ def __iter__(self):
+ """Get a list of participating clients' data values"""
+ self._ensure_parent()
+ children = self._get_children()
+ for child in children:
+ try:
+ d, _ = self.client.retry(self.client.get, self.path +
+ "/" + child)
+ yield d.decode('utf-8')
+ except NoNodeError: # pragma: nocover
+ pass
+
+ def _get_children(self):
+ children = BaseParty._get_children(self)
+ return [c for c in children if self._NODE_NAME in c]
+
+
+class ShallowParty(BaseParty):
+ """Simple shallow pool of participating processes
+
+ This differs from the :class:`Party` as the identifier is used in
+ the name of the party node itself, rather than the data. This
+ places some restrictions on the length as it must be a valid
+ Zookeeper node (an alphanumeric string), but reduces the overhead
+ of getting a list of participants to a single Zookeeper call.
+
+ """
+ def __init__(self, client, path, identifier=None):
+ BaseParty.__init__(self, client, path, identifier=identifier)
+ self.node = '-'.join([uuid.uuid4().hex, self.data.decode('utf-8')])
+ self.create_path = self.path + "/" + self.node
+
+ def __iter__(self):
+ """Get a list of participating clients' identifiers"""
+ self._ensure_parent()
+ children = self._get_children()
+ for child in children:
+ yield child[child.find('-') + 1:]
diff --git a/slider-agent/src/main/python/kazoo/recipe/queue.py b/slider-agent/src/main/python/kazoo/recipe/queue.py
new file mode 100644
index 0000000..81289b9
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/queue.py
@@ -0,0 +1,322 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper based queue implementations.
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+
+import uuid
+from kazoo.exceptions import NoNodeError, NodeExistsError
+from kazoo.retry import ForceRetryError
+from kazoo.protocol.states import EventType
+
+
+class BaseQueue(object):
+ """A common base class for queue implementations."""
+
+ def __init__(self, client, path):
+ """
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The queue path to use in ZooKeeper.
+ """
+ self.client = client
+ self.path = path
+ self._entries_path = path
+ self.structure_paths = (self.path, )
+ self.ensured_path = False
+
+ def _check_put_arguments(self, value, priority=100):
+ if not isinstance(value, bytes):
+ raise TypeError("value must be a byte string")
+ if not isinstance(priority, int):
+ raise TypeError("priority must be an int")
+ elif priority < 0 or priority > 999:
+ raise ValueError("priority must be between 0 and 999")
+
+ def _ensure_paths(self):
+ if not self.ensured_path:
+ # make sure our parent / internal structure nodes exists
+ for path in self.structure_paths:
+ self.client.ensure_path(path)
+ self.ensured_path = True
+
+ def __len__(self):
+ self._ensure_paths()
+ _, stat = self.client.retry(self.client.get, self._entries_path)
+ return stat.children_count
+
+
+class Queue(BaseQueue):
+ """A distributed queue with optional priority support.
+
+ This queue does not offer reliable consumption. An entry is removed
+ from the queue prior to being processed. So if an error occurs, the
+ consumer has to re-queue the item or it will be lost.
+
+ """
+
+ prefix = "entry-"
+
+ def __init__(self, client, path):
+ """
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The queue path to use in ZooKeeper.
+ """
+ super(Queue, self).__init__(client, path)
+ self._children = []
+
+ def __len__(self):
+ """Return queue size."""
+ return super(Queue, self).__len__()
+
+ def get(self):
+ """
+ Get item data and remove an item from the queue.
+
+ :returns: Item data or None.
+ :rtype: bytes
+ """
+ self._ensure_paths()
+ return self.client.retry(self._inner_get)
+
+ def _inner_get(self):
+ if not self._children:
+ self._children = self.client.retry(self.client.get_children, self.path)
+ self._children = sorted(self._children)
+ if not self._children:
+ return None
+ name = self._children[0]
+ try:
+ data, stat = self.client.get(self.path + "/" + name)
+ except NoNodeError: # pragma: nocover
+ # the first node has vanished in the meantime, try to
+ # get another one
+ raise ForceRetryError()
+ try:
+ self.client.delete(self.path + "/" + name)
+ except NoNodeError: # pragma: nocover
+ # we were able to get the data but someone else has removed
+ # the node in the meantime. consider the item as processed
+ # by the other process
+ raise ForceRetryError()
+ self._children.pop(0)
+ return data
+
+ def put(self, value, priority=100):
+ """Put an item into the queue.
+
+ :param value: Byte string to put into the queue.
+ :param priority:
+ An optional priority as an integer with at most 3 digits.
+ Lower values signify higher priority.
+ """
+ self._check_put_arguments(value, priority)
+ self._ensure_paths()
+ path = '{path}/{prefix}{priority:03d}-'.format(
+ path=self.path, prefix=self.prefix, priority=priority)
+ self.client.create(path, value, sequence=True)
+
+
+class LockingQueue(BaseQueue):
+ """A distributed queue with priority and locking support.
+
+ Upon retrieving an entry from the queue, the entry gets locked with an
+ ephemeral node (instead of deleted). If an error occurs, this lock gets
+ released so that others could retake the entry. This adds a little penalty
+ as compared to :class:`Queue` implementation.
+
+ The user should call the :meth:`LockingQueue.get` method first to lock and
+ retrieve the next entry. When finished processing the entry, a user should
+ call the :meth:`LockingQueue.consume` method that will remove the entry
+ from the queue.
+
+ This queue will not track connection status with ZooKeeper. If a node locks
+ an element, then loses connection with ZooKeeper and later reconnects, the
+ lock will probably be removed by Zookeeper in the meantime, but a node
+ would still think that it holds a lock. The user should check the
+ connection status with Zookeeper or call :meth:`LockingQueue.holds_lock`
+ method that will check if a node still holds the lock.
+
+ .. note::
+ :class:`LockingQueue` requires ZooKeeper 3.4 or above, since it is
+ using transactions.
+ """
+ lock = "/taken"
+ entries = "/entries"
+ entry = "entry"
+
+ def __init__(self, client, path):
+ """
+ :param client: A :class:`~kazoo.client.KazooClient` instance.
+ :param path: The queue path to use in ZooKeeper.
+ """
+ super(LockingQueue, self).__init__(client, path)
+ self.id = uuid.uuid4().hex.encode()
+ self.processing_element = None
+ self._lock_path = self.path + self.lock
+ self._entries_path = self.path + self.entries
+ self.structure_paths = (self._lock_path, self._entries_path)
+
+ def __len__(self):
+ """Returns the current length of the queue.
+
+ :returns: queue size (includes locked entries count).
+ """
+ return super(LockingQueue, self).__len__()
+
+ def put(self, value, priority=100):
+ """Put an entry into the queue.
+
+ :param value: Byte string to put into the queue.
+ :param priority:
+ An optional priority as an integer with at most 3 digits.
+ Lower values signify higher priority.
+
+ """
+ self._check_put_arguments(value, priority)
+ self._ensure_paths()
+
+ self.client.create(
+ "{path}/{prefix}-{priority:03d}-".format(
+ path=self._entries_path,
+ prefix=self.entry,
+ priority=priority),
+ value, sequence=True)
+
+ def put_all(self, values, priority=100):
+ """Put several entries into the queue. The action only succeeds
+ if all entries where put into the queue.
+
+ :param values: A list of values to put into the queue.
+ :param priority:
+ An optional priority as an integer with at most 3 digits.
+ Lower values signify higher priority.
+
+ """
+ if not isinstance(values, list):
+ raise TypeError("values must be a list of byte strings")
+ if not isinstance(priority, int):
+ raise TypeError("priority must be an int")
+ elif priority < 0 or priority > 999:
+ raise ValueError("priority must be between 0 and 999")
+ self._ensure_paths()
+
+ with self.client.transaction() as transaction:
+ for value in values:
+ if not isinstance(value, bytes):
+ raise TypeError("value must be a byte string")
+ transaction.create(
+ "{path}/{prefix}-{priority:03d}-".format(
+ path=self._entries_path,
+ prefix=self.entry,
+ priority=priority),
+ value, sequence=True)
+
+ def get(self, timeout=None):
+ """Locks and gets an entry from the queue. If a previously got entry
+ was not consumed, this method will return that entry.
+
+ :param timeout:
+ Maximum waiting time in seconds. If None then it will wait
+ untill an entry appears in the queue.
+ :returns: A locked entry value or None if the timeout was reached.
+ :rtype: bytes
+ """
+ self._ensure_paths()
+ if not self.processing_element is None:
+ return self.processing_element[1]
+ else:
+ return self._inner_get(timeout)
+
+ def holds_lock(self):
+ """Checks if a node still holds the lock.
+
+ :returns: True if a node still holds the lock, False otherwise.
+ :rtype: bool
+ """
+ if self.processing_element is None:
+ return False
+ lock_id, _ = self.processing_element
+ lock_path = "{path}/{id}".format(path=self._lock_path, id=lock_id)
+ self.client.sync(lock_path)
+ value, stat = self.client.retry(self.client.get, lock_path)
+ return value == self.id
+
+ def consume(self):
+ """Removes a currently processing entry from the queue.
+
+ :returns: True if element was removed successfully, False otherwise.
+ :rtype: bool
+ """
+ if not self.processing_element is None and self.holds_lock:
+ id_, value = self.processing_element
+ with self.client.transaction() as transaction:
+ transaction.delete("{path}/{id}".format(
+ path=self._entries_path,
+ id=id_))
+ transaction.delete("{path}/{id}".format(
+ path=self._lock_path,
+ id=id_))
+ self.processing_element = None
+ return True
+ else:
+ return False
+
+ def _inner_get(self, timeout):
+ flag = self.client.handler.event_object()
+ lock = self.client.handler.lock_object()
+ canceled = False
+ value = []
+
+ def check_for_updates(event):
+ if not event is None and event.type != EventType.CHILD:
+ return
+ with lock:
+ if canceled or flag.isSet():
+ return
+ values = self.client.retry(self.client.get_children,
+ self._entries_path,
+ check_for_updates)
+ taken = self.client.retry(self.client.get_children,
+ self._lock_path,
+ check_for_updates)
+ available = self._filter_locked(values, taken)
+ if len(available) > 0:
+ ret = self._take(available[0])
+ if not ret is None:
+ # By this time, no one took the task
+ value.append(ret)
+ flag.set()
+
+ check_for_updates(None)
+ retVal = None
+ flag.wait(timeout)
+ with lock:
+ canceled = True
+ if len(value) > 0:
+ # We successfully locked an entry
+ self.processing_element = value[0]
+ retVal = value[0][1]
+ return retVal
+
+ def _filter_locked(self, values, taken):
+ taken = set(taken)
+ available = sorted(values)
+ return (available if len(taken) == 0 else
+ [x for x in available if x not in taken])
+
+ def _take(self, id_):
+ try:
+ self.client.create(
+ "{path}/{id}".format(
+ path=self._lock_path,
+ id=id_),
+ self.id,
+ ephemeral=True)
+ value, stat = self.client.retry(self.client.get,
+ "{path}/{id}".format(path=self._entries_path, id=id_))
+ except (NoNodeError, NodeExistsError):
+ # Item is already consumed or locked
+ return None
+ return (id_, value)
diff --git a/slider-agent/src/main/python/kazoo/recipe/watchers.py b/slider-agent/src/main/python/kazoo/recipe/watchers.py
new file mode 100644
index 0000000..f9667f8
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/watchers.py
@@ -0,0 +1,420 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Higher level child and data watching API's.
+
+:Maintainer: Ben Bangert <ben@groovie.org>
+:Status: Production
+
+.. note::
+
+ :ref:`DataWatch` and :ref:`ChildrenWatch` may only handle a single
+ function, attempts to associate a single instance with multiple functions
+ will result in an exception being thrown.
+
+"""
+import logging
+import time
+import warnings
+from functools import partial, wraps
+
+from kazoo.retry import KazooRetry
+from kazoo.exceptions import (
+ ConnectionClosedError,
+ NoNodeError,
+ KazooException
+)
+from kazoo.protocol.states import KazooState
+
+log = logging.getLogger(__name__)
+
+
+_STOP_WATCHING = object()
+
+
+def _ignore_closed(func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except ConnectionClosedError:
+ pass
+ return wrapper
+
+
+class DataWatch(object):
+ """Watches a node for data updates and calls the specified
+ function each time it changes
+
+ The function will also be called the very first time its
+ registered to get the data.
+
+ Returning `False` from the registered function will disable future
+ data change calls. If the client connection is closed (using the
+ close command), the DataWatch will no longer get updates.
+
+ If the function supplied takes three arguments, then the third one
+ will be a :class:`~kazoo.protocol.states.WatchedEvent`. It will
+ only be set if the change to the data occurs as a result of the
+ server notifying the watch that there has been a change. Events
+ like reconnection or the first call will not include an event.
+
+ If the node does not exist, then the function will be called with
+ ``None`` for all values.
+
+ .. tip::
+
+ Because :class:`DataWatch` can watch nodes that don't exist, it
+ can be used alternatively as a higher-level Exists watcher that
+ survives reconnections and session loss.
+
+ Example with client:
+
+ .. code-block:: python
+
+ @client.DataWatch('/path/to/watch')
+ def my_func(data, stat):
+ print("Data is %s" % data)
+ print("Version is %s" % stat.version)
+
+ # Above function is called immediately and prints
+
+ # Or if you want the event object
+ @client.DataWatch('/path/to/watch')
+ def my_func(data, stat, event):
+ print("Data is %s" % data)
+ print("Version is %s" % stat.version)
+ print("Event is %s" % event)
+
+ .. versionchanged:: 1.2
+
+ DataWatch now ignores additional arguments that were previously
+ passed to it and warns that they are no longer respected.
+
+ """
+ def __init__(self, client, path, func=None, *args, **kwargs):
+ """Create a data watcher for a path
+
+ :param client: A zookeeper client.
+ :type client: :class:`~kazoo.client.KazooClient`
+ :param path: The path to watch for data changes on.
+ :type path: str
+ :param func: Function to call initially and every time the
+ node changes. `func` will be called with a
+ tuple, the value of the node and a
+ :class:`~kazoo.client.ZnodeStat` instance.
+ :type func: callable
+
+ """
+ self._client = client
+ self._path = path
+ self._func = func
+ self._stopped = False
+ self._run_lock = client.handler.lock_object()
+ self._version = None
+ self._retry = KazooRetry(max_tries=None,
+ sleep_func=client.handler.sleep_func)
+ self._include_event = None
+ self._ever_called = False
+ self._used = False
+
+ if args or kwargs:
+ warnings.warn('Passing additional arguments to DataWatch is'
+ ' deprecated. ignore_missing_node is now assumed '
+ ' to be True by default, and the event will be '
+ ' sent if the function can handle receiving it',
+ DeprecationWarning, stacklevel=2)
+
+ # Register our session listener if we're going to resume
+ # across session losses
+ if func is not None:
+ self._used = True
+ self._client.add_listener(self._session_watcher)
+ self._get_data()
+
+ def __call__(self, func):
+ """Callable version for use as a decorator
+
+ :param func: Function to call initially and every time the
+ data changes. `func` will be called with a
+ tuple, the value of the node and a
+ :class:`~kazoo.client.ZnodeStat` instance.
+ :type func: callable
+
+ """
+ if self._used:
+ raise KazooException(
+ "A function has already been associated with this "
+ "DataWatch instance.")
+
+ self._func = func
+
+ self._used = True
+ self._client.add_listener(self._session_watcher)
+ self._get_data()
+ return func
+
+ def _log_func_exception(self, data, stat, event=None):
+ try:
+ # For backwards compatibility, don't send event to the
+ # callback unless the send_event is set in constructor
+ if not self._ever_called:
+ self._ever_called = True
+ try:
+ result = self._func(data, stat, event)
+ except TypeError:
+ result = self._func(data, stat)
+ if result is False:
+ self._stopped = True
+ self._client.remove_listener(self._session_watcher)
+ except Exception as exc:
+ log.exception(exc)
+ raise
+
+ @_ignore_closed
+ def _get_data(self, event=None):
+ # Ensure this runs one at a time, possible because the session
+ # watcher may trigger a run
+ with self._run_lock:
+ if self._stopped:
+ return
+
+ initial_version = self._version
+
+ try:
+ data, stat = self._retry(self._client.get,
+ self._path, self._watcher)
+ except NoNodeError:
+ data = None
+
+ # This will set 'stat' to None if the node does not yet
+ # exist.
+ stat = self._retry(self._client.exists, self._path,
+ self._watcher)
+ if stat:
+ self._client.handler.spawn(self._get_data)
+ return
+
+ # No node data, clear out version
+ if stat is None:
+ self._version = None
+ else:
+ self._version = stat.mzxid
+
+ # Call our function if its the first time ever, or if the
+ # version has changed
+ if initial_version != self._version or not self._ever_called:
+ self._log_func_exception(data, stat, event)
+
+ def _watcher(self, event):
+ self._get_data(event=event)
+
+ def _set_watch(self, state):
+ with self._run_lock:
+ self._watch_established = state
+
+ def _session_watcher(self, state):
+ if state == KazooState.CONNECTED:
+ self._client.handler.spawn(self._get_data)
+
+
+class ChildrenWatch(object):
+ """Watches a node for children updates and calls the specified
+ function each time it changes
+
+ The function will also be called the very first time its
+ registered to get children.
+
+ Returning `False` from the registered function will disable future
+ children change calls. If the client connection is closed (using
+ the close command), the ChildrenWatch will no longer get updates.
+
+ if send_event=True in __init__, then the function will always be
+ called with second parameter, ``event``. Upon initial call or when
+ recovering a lost session the ``event`` is always ``None``.
+ Otherwise it's a :class:`~kazoo.prototype.state.WatchedEvent`
+ instance.
+
+ Example with client:
+
+ .. code-block:: python
+
+ @client.ChildrenWatch('/path/to/watch')
+ def my_func(children):
+ print "Children are %s" % children
+
+ # Above function is called immediately and prints children
+
+ """
+ def __init__(self, client, path, func=None,
+ allow_session_lost=True, send_event=False):
+ """Create a children watcher for a path
+
+ :param client: A zookeeper client.
+ :type client: :class:`~kazoo.client.KazooClient`
+ :param path: The path to watch for children on.
+ :type path: str
+ :param func: Function to call initially and every time the
+ children change. `func` will be called with a
+ single argument, the list of children.
+ :type func: callable
+ :param allow_session_lost: Whether the watch should be
+ re-registered if the zookeeper
+ session is lost.
+ :type allow_session_lost: bool
+ :type send_event: bool
+ :param send_event: Whether the function should be passed the
+ event sent by ZooKeeper or None upon
+ initialization (see class documentation)
+
+ The path must already exist for the children watcher to
+ run.
+
+ """
+ self._client = client
+ self._path = path
+ self._func = func
+ self._send_event = send_event
+ self._stopped = False
+ self._watch_established = False
+ self._allow_session_lost = allow_session_lost
+ self._run_lock = client.handler.lock_object()
+ self._prior_children = None
+ self._used = False
+
+ # Register our session listener if we're going to resume
+ # across session losses
+ if func is not None:
+ self._used = True
+ if allow_session_lost:
+ self._client.add_listener(self._session_watcher)
+ self._get_children()
+
+ def __call__(self, func):
+ """Callable version for use as a decorator
+
+ :param func: Function to call initially and every time the
+ children change. `func` will be called with a
+ single argument, the list of children.
+ :type func: callable
+
+ """
+ if self._used:
+ raise KazooException(
+ "A function has already been associated with this "
+ "ChildrenWatch instance.")
+
+ self._func = func
+
+ self._used = True
+ if self._allow_session_lost:
+ self._client.add_listener(self._session_watcher)
+ self._get_children()
+ return func
+
+ @_ignore_closed
+ def _get_children(self, event=None):
+ with self._run_lock: # Ensure this runs one at a time
+ if self._stopped:
+ return
+
+ children = self._client.retry(self._client.get_children,
+ self._path, self._watcher)
+ if not self._watch_established:
+ self._watch_established = True
+
+ if self._prior_children is not None and \
+ self._prior_children == children:
+ return
+
+ self._prior_children = children
+
+ try:
+ if self._send_event:
+ result = self._func(children, event)
+ else:
+ result = self._func(children)
+ if result is False:
+ self._stopped = True
+ except Exception as exc:
+ log.exception(exc)
+ raise
+
+ def _watcher(self, event):
+ self._get_children(event)
+
+ def _session_watcher(self, state):
+ if state in (KazooState.LOST, KazooState.SUSPENDED):
+ self._watch_established = False
+ elif state == KazooState.CONNECTED and \
+ not self._watch_established and not self._stopped:
+ self._client.handler.spawn(self._get_children)
+
+
+class PatientChildrenWatch(object):
+ """Patient Children Watch that returns values after the children
+ of a node don't change for a period of time
+
+ A separate watcher for the children of a node, that ignores
+ changes within a boundary time and sets the result only when the
+ boundary time has elapsed with no children changes.
+
+ Example::
+
+ watcher = PatientChildrenWatch(client, '/some/path',
+ time_boundary=5)
+ async_object = watcher.start()
+
+ # Blocks until the children have not changed for time boundary
+ # (5 in this case) seconds, returns children list and an
+ # async_result that will be set if the children change in the
+ # future
+ children, child_async = async_object.get()
+
+ .. note::
+
+ This Watch is different from :class:`DataWatch` and
+ :class:`ChildrenWatch` as it only returns once, does not take
+ a function that is called, and provides an
+ :class:`~kazoo.interfaces.IAsyncResult` object that can be
+ checked to see if the children have changed later.
+
+ """
+ def __init__(self, client, path, time_boundary=30):
+ self.client = client
+ self.path = path
+ self.children = []
+ self.time_boundary = time_boundary
+ self.children_changed = client.handler.event_object()
+
+ def start(self):
+ """Begin the watching process asynchronously
+
+ :returns: An :class:`~kazoo.interfaces.IAsyncResult` instance
+ that will be set when no change has occurred to the
+ children for time boundary seconds.
+
+ """
+ self.asy = asy = self.client.handler.async_result()
+ self.client.handler.spawn(self._inner_start)
+ return asy
+
+ def _inner_start(self):
+ try:
+ while True:
+ async_result = self.client.handler.async_result()
+ self.children = self.client.retry(
+ self.client.get_children, self.path,
+ partial(self._children_watcher, async_result))
+ self.client.handler.sleep_func(self.time_boundary)
+
+ if self.children_changed.is_set():
+ self.children_changed.clear()
+ else:
+ break
+
+ self.asy.set((self.children, async_result))
+ except Exception as exc:
+ self.asy.set_exception(exc)
+
+ def _children_watcher(self, async, event):
+ self.children_changed.set()
+ async.set(time.time())
diff --git a/slider-agent/src/main/python/kazoo/retry.py b/slider-agent/src/main/python/kazoo/retry.py
new file mode 100644
index 0000000..229b99d
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/retry.py
@@ -0,0 +1,151 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import logging
+import random
+import time
+
+from kazoo.exceptions import (
+ ConnectionClosedError,
+ ConnectionLoss,
+ KazooException,
+ OperationTimeoutError,
+ SessionExpiredError,
+)
+
+log = logging.getLogger(__name__)
+
+
+class ForceRetryError(Exception):
+ """Raised when some recipe logic wants to force a retry."""
+
+
+class RetryFailedError(KazooException):
+ """Raised when retrying an operation ultimately failed, after
+ retrying the maximum number of attempts.
+ """
+
+
+class InterruptedError(RetryFailedError):
+ """Raised when the retry is forcibly interrupted by the interrupt
+ function"""
+
+
+class KazooRetry(object):
+ """Helper for retrying a method in the face of retry-able
+ exceptions"""
+ RETRY_EXCEPTIONS = (
+ ConnectionLoss,
+ OperationTimeoutError,
+ ForceRetryError
+ )
+
+ EXPIRED_EXCEPTIONS = (
+ SessionExpiredError,
+ )
+
+ def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=0.8,
+ max_delay=3600, ignore_expire=True, sleep_func=time.sleep,
+ deadline=None, interrupt=None):
+ """Create a :class:`KazooRetry` instance for retrying function
+ calls
+
+ :param max_tries: How many times to retry the command. -1 means
+ infinite tries.
+ :param delay: Initial delay between retry attempts.
+ :param backoff: Backoff multiplier between retry attempts.
+ Defaults to 2 for exponential backoff.
+ :param max_jitter: Additional max jitter period to wait between
+ retry attempts to avoid slamming the server.
+ :param max_delay: Maximum delay in seconds, regardless of other
+ backoff settings. Defaults to one hour.
+ :param ignore_expire:
+ Whether a session expiration should be ignored and treated
+ as a retry-able command.
+ :param interrupt:
+ Function that will be called with no args that may return
+ True if the retry should be ceased immediately. This will
+ be called no more than every 0.1 seconds during a wait
+ between retries.
+
+ """
+ self.max_tries = max_tries
+ self.delay = delay
+ self.backoff = backoff
+ self.max_jitter = int(max_jitter * 100)
+ self.max_delay = float(max_delay)
+ self._attempts = 0
+ self._cur_delay = delay
+ self.deadline = deadline
+ self._cur_stoptime = None
+ self.sleep_func = sleep_func
+ self.retry_exceptions = self.RETRY_EXCEPTIONS
+ self.interrupt = interrupt
+ if ignore_expire:
+ self.retry_exceptions += self.EXPIRED_EXCEPTIONS
+
+ def reset(self):
+ """Reset the attempt counter"""
+ self._attempts = 0
+ self._cur_delay = self.delay
+ self._cur_stoptime = None
+
+ def copy(self):
+ """Return a clone of this retry manager"""
+ obj = KazooRetry(max_tries=self.max_tries,
+ delay=self.delay,
+ backoff=self.backoff,
+ max_jitter=self.max_jitter / 100.0,
+ max_delay=self.max_delay,
+ sleep_func=self.sleep_func,
+ deadline=self.deadline,
+ interrupt=self.interrupt)
+ obj.retry_exceptions = self.retry_exceptions
+ return obj
+
+ def __call__(self, func, *args, **kwargs):
+ """Call a function with arguments until it completes without
+ throwing a Kazoo exception
+
+ :param func: Function to call
+ :param args: Positional arguments to call the function with
+ :params kwargs: Keyword arguments to call the function with
+
+ The function will be called until it doesn't throw one of the
+ retryable exceptions (ConnectionLoss, OperationTimeout, or
+ ForceRetryError), and optionally retrying on session
+ expiration.
+
+ """
+ self.reset()
+
+ while True:
+ try:
+ if self.deadline is not None and self._cur_stoptime is None:
+ self._cur_stoptime = time.time() + self.deadline
+ return func(*args, **kwargs)
+ except ConnectionClosedError:
+ raise
+ except self.retry_exceptions:
+ # Note: max_tries == -1 means infinite tries.
+ if self._attempts == self.max_tries:
+ raise RetryFailedError("Too many retry attempts")
+ self._attempts += 1
+ sleeptime = self._cur_delay + (random.randint(0, self.max_jitter) / 100.0)
+
+ if self._cur_stoptime is not None and time.time() + sleeptime >= self._cur_stoptime:
+ raise RetryFailedError("Exceeded retry deadline")
+
+ if self.interrupt:
+ while sleeptime > 0:
+ # Break the time period down and sleep for no longer than
+ # 0.1 before calling the interrupt
+ if sleeptime < 0.1:
+ self.sleep_func(sleeptime)
+ sleeptime -= sleeptime
+ else:
+ self.sleep_func(0.1)
+ sleeptime -= 0.1
+ if self.interrupt():
+ raise InterruptedError()
+ else:
+ self.sleep_func(sleeptime)
+ self._cur_delay = min(self._cur_delay * self.backoff, self.max_delay)
diff --git a/slider-agent/src/main/python/kazoo/security.py b/slider-agent/src/main/python/kazoo/security.py
new file mode 100644
index 0000000..4532489
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/security.py
@@ -0,0 +1,139 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Security"""
+from base64 import b64encode
+from collections import namedtuple
+import hashlib
+
+
+# Represents a Zookeeper ID and ACL object
+Id = namedtuple('Id', 'scheme id')
+
+
+class ACL(namedtuple('ACL', 'perms id')):
+ """An ACL for a Zookeeper Node
+
+ An ACL object is created by using an :class:`Id` object along with
+ a :class:`Permissions` setting. For convenience,
+ :meth:`make_digest_acl` should be used to create an ACL object with
+ the desired scheme, id, and permissions.
+
+ """
+ @property
+ def acl_list(self):
+ perms = []
+ if self.perms & Permissions.ALL == Permissions.ALL:
+ perms.append('ALL')
+ return perms
+ if self.perms & Permissions.READ == Permissions.READ:
+ perms.append('READ')
+ if self.perms & Permissions.WRITE == Permissions.WRITE:
+ perms.append('WRITE')
+ if self.perms & Permissions.CREATE == Permissions.CREATE:
+ perms.append('CREATE')
+ if self.perms & Permissions.DELETE == Permissions.DELETE:
+ perms.append('DELETE')
+ if self.perms & Permissions.ADMIN == Permissions.ADMIN:
+ perms.append('ADMIN')
+ return perms
+
+ def __repr__(self):
+ return 'ACL(perms=%r, acl_list=%s, id=%r)' % (
+ self.perms, self.acl_list, self.id)
+
+
+class Permissions(object):
+ READ = 1
+ WRITE = 2
+ CREATE = 4
+ DELETE = 8
+ ADMIN = 16
+ ALL = 31
+
+
+# Shortcuts for common Ids
+ANYONE_ID_UNSAFE = Id('world', 'anyone')
+AUTH_IDS = Id('auth', '')
+
+# Shortcuts for common ACLs
+OPEN_ACL_UNSAFE = [ACL(Permissions.ALL, ANYONE_ID_UNSAFE)]
+CREATOR_ALL_ACL = [ACL(Permissions.ALL, AUTH_IDS)]
+READ_ACL_UNSAFE = [ACL(Permissions.READ, ANYONE_ID_UNSAFE)]
+
+
+def make_digest_acl_credential(username, password):
+ """Create a SHA1 digest credential"""
+ credential = username.encode('utf-8') + b":" + password.encode('utf-8')
+ cred_hash = b64encode(hashlib.sha1(credential).digest()).strip()
+ return username + ":" + cred_hash.decode('utf-8')
+
+
+def make_acl(scheme, credential, read=False, write=False,
+ create=False, delete=False, admin=False, all=False):
+ """Given a scheme and credential, return an :class:`ACL` object
+ appropriate for use with Kazoo.
+
+ :param scheme: The scheme to use. I.e. `digest`.
+ :param credential:
+ A colon separated username, password. The password should be
+ hashed with the `scheme` specified. The
+ :meth:`make_digest_acl_credential` method will create and
+ return a credential appropriate for use with the `digest`
+ scheme.
+ :param write: Write permission.
+ :type write: bool
+ :param create: Create permission.
+ :type create: bool
+ :param delete: Delete permission.
+ :type delete: bool
+ :param admin: Admin permission.
+ :type admin: bool
+ :param all: All permissions.
+ :type all: bool
+
+ :rtype: :class:`ACL`
+
+ """
+ if all:
+ permissions = Permissions.ALL
+ else:
+ permissions = 0
+ if read:
+ permissions |= Permissions.READ
+ if write:
+ permissions |= Permissions.WRITE
+ if create:
+ permissions |= Permissions.CREATE
+ if delete:
+ permissions |= Permissions.DELETE
+ if admin:
+ permissions |= Permissions.ADMIN
+ return ACL(permissions, Id(scheme, credential))
+
+
+def make_digest_acl(username, password, read=False, write=False,
+ create=False, delete=False, admin=False, all=False):
+ """Create a digest ACL for Zookeeper with the given permissions
+
+ This method combines :meth:`make_digest_acl_credential` and
+ :meth:`make_acl` to create an :class:`ACL` object appropriate for
+ use with Kazoo's ACL methods.
+
+ :param username: Username to use for the ACL.
+ :param password: A plain-text password to hash.
+ :param write: Write permission.
+ :type write: bool
+ :param create: Create permission.
+ :type create: bool
+ :param delete: Delete permission.
+ :type delete: bool
+ :param admin: Admin permission.
+ :type admin: bool
+ :param all: All permissions.
+ :type all: bool
+
+ :rtype: :class:`ACL`
+
+ """
+ cred = make_digest_acl_credential(username, password)
+ return make_acl("digest", cred, read=read, write=write, create=create,
+ delete=delete, admin=admin, all=all)
diff --git a/slider-agent/src/main/python/kazoo/testing/__init__.py b/slider-agent/src/main/python/kazoo/testing/__init__.py
new file mode 100644
index 0000000..660546b
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/testing/__init__.py
@@ -0,0 +1,6 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+from kazoo.testing.harness import KazooTestCase
+from kazoo.testing.harness import KazooTestHarness
+
+
+__all__ = ('KazooTestHarness', 'KazooTestCase', )
diff --git a/slider-agent/src/main/python/kazoo/testing/common.py b/slider-agent/src/main/python/kazoo/testing/common.py
new file mode 100644
index 0000000..b497a8e
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/testing/common.py
@@ -0,0 +1,284 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
+# Copyright (C) 2010-2011, 2011 Canonical Ltd. All Rights Reserved
+#
+# This file was originally taken from txzookeeper and modified later.
+#
+# Authors:
+# Kapil Thangavelu and the Kazoo team
+#
+# txzookeeper is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# txzookeeper is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with txzookeeper. If not, see <http://www.gnu.org/licenses/>.
+
+
+import code
+import os
+import os.path
+import shutil
+import signal
+import subprocess
+import tempfile
+import traceback
+
+from itertools import chain
+from collections import namedtuple
+from glob import glob
+
+
+def debug(sig, frame):
+ """Interrupt running process, and provide a python prompt for
+ interactive debugging."""
+ d = {'_frame': frame} # Allow access to frame object.
+ d.update(frame.f_globals) # Unless shadowed by global
+ d.update(frame.f_locals)
+
+ i = code.InteractiveConsole(d)
+ message = "Signal recieved : entering python shell.\nTraceback:\n"
+ message += ''.join(traceback.format_stack(frame))
+ i.interact(message)
+
+
+def listen():
+ if os.name != 'nt': # SIGUSR1 is not supported on Windows
+ signal.signal(signal.SIGUSR1, debug) # Register handler
+listen()
+
+
+def to_java_compatible_path(path):
+ if os.name == 'nt':
+ path = path.replace('\\', '/')
+ return path
+
+ServerInfo = namedtuple(
+ "ServerInfo", "server_id client_port election_port leader_port")
+
+
+class ManagedZooKeeper(object):
+ """Class to manage the running of a ZooKeeper instance for testing.
+
+ Note: no attempt is made to probe the ZooKeeper instance is
+ actually available, or that the selected port is free. In the
+ future, we may want to do that, especially when run in a
+ Hudson/Buildbot context, to ensure more test robustness."""
+
+ def __init__(self, software_path, server_info, peers=(), classpath=None):
+ """Define the ZooKeeper test instance.
+
+ @param install_path: The path to the install for ZK
+ @param port: The port to run the managed ZK instance
+ """
+ self.install_path = software_path
+ self._classpath = classpath
+ self.server_info = server_info
+ self.host = "127.0.0.1"
+ self.peers = peers
+ self.working_path = tempfile.mkdtemp()
+ self._running = False
+
+ def run(self):
+ """Run the ZooKeeper instance under a temporary directory.
+
+ Writes ZK log messages to zookeeper.log in the current directory.
+ """
+ if self.running:
+ return
+ config_path = os.path.join(self.working_path, "zoo.cfg")
+ log_path = os.path.join(self.working_path, "log")
+ log4j_path = os.path.join(self.working_path, "log4j.properties")
+ data_path = os.path.join(self.working_path, "data")
+
+ # various setup steps
+ if not os.path.exists(self.working_path):
+ os.mkdir(self.working_path)
+ if not os.path.exists(log_path):
+ os.mkdir(log_path)
+ if not os.path.exists(data_path):
+ os.mkdir(data_path)
+
+ with open(config_path, "w") as config:
+ config.write("""
+tickTime=2000
+dataDir=%s
+clientPort=%s
+maxClientCnxns=0
+""" % (to_java_compatible_path(data_path), self.server_info.client_port))
+
+ # setup a replicated setup if peers are specified
+ if self.peers:
+ servers_cfg = []
+ for p in chain((self.server_info,), self.peers):
+ servers_cfg.append("server.%s=localhost:%s:%s" % (
+ p.server_id, p.leader_port, p.election_port))
+
+ with open(config_path, "a") as config:
+ config.write("""
+initLimit=4
+syncLimit=2
+%s
+""" % ("\n".join(servers_cfg)))
+
+ # Write server ids into datadir
+ with open(os.path.join(data_path, "myid"), "w") as myid_file:
+ myid_file.write(str(self.server_info.server_id))
+
+ with open(log4j_path, "w") as log4j:
+ log4j.write("""
+# DEFAULT: console appender only
+log4j.rootLogger=INFO, ROLLINGFILE
+log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
+log4j.appender.ROLLINGFILE.Threshold=DEBUG
+log4j.appender.ROLLINGFILE.File=""" + to_java_compatible_path(
+ self.working_path + os.sep + "zookeeper.log\n"))
+
+ self.process = subprocess.Popen(
+ args=["java",
+ "-cp", self.classpath,
+ "-Dreadonlymode.enabled=true",
+ "-Dzookeeper.log.dir=%s" % log_path,
+ "-Dzookeeper.root.logger=INFO,CONSOLE",
+ "-Dlog4j.configuration=file:%s" % log4j_path,
+ # "-Dlog4j.debug",
+ "org.apache.zookeeper.server.quorum.QuorumPeerMain",
+ config_path])
+ self._running = True
+
+ @property
+ def classpath(self):
+ """Get the classpath necessary to run ZooKeeper."""
+
+ if self._classpath:
+ return self._classpath
+
+ # Two possibilities, as seen in zkEnv.sh:
+ # Check for a release - top-level zookeeper-*.jar?
+ jars = glob((os.path.join(
+ self.install_path, 'zookeeper-*.jar')))
+ if jars:
+ # Release build (`ant package`)
+ jars.extend(glob(os.path.join(
+ self.install_path,
+ "lib/*.jar")))
+ # support for different file locations on Debian/Ubuntu
+ jars.extend(glob(os.path.join(
+ self.install_path,
+ "log4j-*.jar")))
+ jars.extend(glob(os.path.join(
+ self.install_path,
+ "slf4j-api-*.jar")))
+ jars.extend(glob(os.path.join(
+ self.install_path,
+ "slf4j-log4j-*.jar")))
+ else:
+ # Development build (plain `ant`)
+ jars = glob((os.path.join(
+ self.install_path, 'build/zookeeper-*.jar')))
+ jars.extend(glob(os.path.join(
+ self.install_path,
+ "build/lib/*.jar")))
+
+ return os.pathsep.join(jars)
+
+ @property
+ def address(self):
+ """Get the address of the ZooKeeper instance."""
+ return "%s:%s" % (self.host, self.client_port)
+
+ @property
+ def running(self):
+ return self._running
+
+ @property
+ def client_port(self):
+ return self.server_info.client_port
+
+ def reset(self):
+ """Stop the zookeeper instance, cleaning out its on disk-data."""
+ self.stop()
+ shutil.rmtree(os.path.join(self.working_path, "data"))
+ os.mkdir(os.path.join(self.working_path, "data"))
+ with open(os.path.join(self.working_path, "data", "myid"), "w") as fh:
+ fh.write(str(self.server_info.server_id))
+
+ def stop(self):
+ """Stop the Zookeeper instance, retaining on disk state."""
+ if not self.running:
+ return
+ self.process.terminate()
+ self.process.wait()
+ self._running = False
+
+ def destroy(self):
+ """Stop the ZooKeeper instance and destroy its on disk-state"""
+ # called by at exit handler, reimport to avoid cleanup race.
+ import shutil
+ self.stop()
+
+ shutil.rmtree(self.working_path)
+
+
+class ZookeeperCluster(object):
+
+ def __init__(self, install_path=None, classpath=None, size=3, port_offset=20000):
+ self._install_path = install_path
+ self._classpath = classpath
+ self._servers = []
+
+ # Calculate ports and peer group
+ port = port_offset
+ peers = []
+
+ for i in range(size):
+ info = ServerInfo(i + 1, port, port + 1, port + 2)
+ peers.append(info)
+ port += 10
+
+ # Instantiate Managed ZK Servers
+ for i in range(size):
+ server_peers = list(peers)
+ server_info = server_peers.pop(i)
+ self._servers.append(
+ ManagedZooKeeper(
+ self._install_path, server_info, server_peers, classpath=self._classpath))
+
+ def __getitem__(self, k):
+ return self._servers[k]
+
+ def __iter__(self):
+ return iter(self._servers)
+
+ def start(self):
+ # Zookeeper client expresses a preference for either lower ports or
+ # lexicographical ordering of hosts, to ensure that all servers have a
+ # chance to startup, start them in reverse order.
+ for server in reversed(list(self)):
+ server.run()
+ # Giving the servers a moment to start, decreases the overall time
+ # required for a client to successfully connect (2s vs. 4s without
+ # the sleep).
+ import time
+ time.sleep(2)
+
+ def stop(self):
+ for server in self:
+ server.stop()
+ self._servers = []
+
+ def terminate(self):
+ for server in self:
+ server.destroy()
+
+ def reset(self):
+ for server in self:
+ server.reset()
diff --git a/slider-agent/src/main/python/kazoo/testing/harness.py b/slider-agent/src/main/python/kazoo/testing/harness.py
new file mode 100644
index 0000000..93cc744
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/testing/harness.py
@@ -0,0 +1,181 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo testing harnesses"""
+import atexit
+import logging
+import os
+import uuid
+import threading
+import unittest
+
+from kazoo.client import KazooClient
+from kazoo.exceptions import NotEmptyError
+from kazoo.protocol.states import (
+ KazooState
+)
+from kazoo.testing.common import ZookeeperCluster
+from kazoo.protocol.connection import _SESSION_EXPIRED
+
+log = logging.getLogger(__name__)
+
+CLUSTER = None
+
+
+def get_global_cluster():
+ global CLUSTER
+ if CLUSTER is None:
+ ZK_HOME = os.environ.get("ZOOKEEPER_PATH")
+ ZK_CLASSPATH = os.environ.get("ZOOKEEPER_CLASSPATH")
+ ZK_PORT_OFFSET = int(os.environ.get("ZOOKEEPER_PORT_OFFSET", 20000))
+
+ assert ZK_HOME or ZK_CLASSPATH, (
+ "either ZOOKEEPER_PATH or ZOOKEEPER_CLASSPATH environment variable "
+ "must be defined.\n"
+ "For deb package installations this is /usr/share/java")
+
+ CLUSTER = ZookeeperCluster(
+ install_path=ZK_HOME,
+ classpath=ZK_CLASSPATH,
+ port_offset=ZK_PORT_OFFSET,
+ )
+ atexit.register(lambda cluster: cluster.terminate(), CLUSTER)
+ return CLUSTER
+
+
+class KazooTestHarness(unittest.TestCase):
+ """Harness for testing code that uses Kazoo
+
+ This object can be used directly or as a mixin. It supports starting
+ and stopping a complete ZooKeeper cluster locally and provides an
+ API for simulating errors and expiring sessions.
+
+ Example::
+
+ class MyTestCase(KazooTestHarness):
+ def setUp(self):
+ self.setup_zookeeper()
+
+ # additional test setup
+
+ def tearDown(self):
+ self.teardown_zookeeper()
+
+ def test_something(self):
+ something_that_needs_a_kazoo_client(self.client)
+
+ def test_something_else(self):
+ something_that_needs_zk_servers(self.servers)
+
+ """
+
+ def __init__(self, *args, **kw):
+ super(KazooTestHarness, self).__init__(*args, **kw)
+ self.client = None
+ self._clients = []
+
+ @property
+ def cluster(self):
+ return get_global_cluster()
+
+ @property
+ def servers(self):
+ return ",".join([s.address for s in self.cluster])
+
+ def _get_nonchroot_client(self):
+ return KazooClient(self.servers)
+
+ def _get_client(self, **kwargs):
+ c = KazooClient(self.hosts, **kwargs)
+ try:
+ self._clients.append(c)
+ except AttributeError:
+ self._client = [c]
+ return c
+
+ def expire_session(self, client_id=None):
+ """Force ZK to expire a client session
+
+ :param client_id: id of client to expire. If unspecified, the id of
+ self.client will be used.
+
+ """
+ client_id = client_id or self.client.client_id
+
+ lost = threading.Event()
+ safe = threading.Event()
+
+ def watch_loss(state):
+ if state == KazooState.LOST:
+ lost.set()
+ if lost.is_set() and state == KazooState.CONNECTED:
+ safe.set()
+ return True
+
+ self.client.add_listener(watch_loss)
+
+ self.client._call(_SESSION_EXPIRED, None)
+
+ lost.wait(5)
+ if not lost.isSet():
+ raise Exception("Failed to get notified of session loss")
+
+ # Wait for the reconnect now
+ safe.wait(15)
+ if not safe.isSet():
+ raise Exception("Failed to see client reconnect")
+ self.client.retry(self.client.get_async, '/')
+
+ def setup_zookeeper(self, **client_options):
+ """Create a ZK cluster and chrooted :class:`KazooClient`
+
+ The cluster will only be created on the first invocation and won't be
+ fully torn down until exit.
+ """
+ if not self.cluster[0].running:
+ self.cluster.start()
+ namespace = "/kazootests" + uuid.uuid4().hex
+ self.hosts = self.servers + namespace
+
+ if 'timeout' not in client_options:
+ client_options['timeout'] = 0.8
+ self.client = self._get_client(**client_options)
+ self.client.start()
+ self.client.ensure_path("/")
+
+ def teardown_zookeeper(self):
+ """Clean up any ZNodes created during the test
+ """
+ if not self.cluster[0].running:
+ self.cluster.start()
+
+ tries = 0
+ if self.client and self.client.connected:
+ while tries < 3:
+ try:
+ self.client.retry(self.client.delete, '/', recursive=True)
+ break
+ except NotEmptyError:
+ pass
+ tries += 1
+ self.client.stop()
+ self.client.close()
+ del self.client
+ else:
+ client = self._get_client()
+ client.start()
+ client.retry(client.delete, '/', recursive=True)
+ client.stop()
+ client.close()
+ del client
+
+ for client in self._clients:
+ client.stop()
+ del client
+ self._clients = None
+
+
+class KazooTestCase(KazooTestHarness):
+ def setUp(self):
+ self.setup_zookeeper()
+
+ def tearDown(self):
+ self.teardown_zookeeper()
diff --git a/slider-agent/src/main/python/kazoo/tests/__init__.py b/slider-agent/src/main/python/kazoo/tests/__init__.py
new file mode 100644
index 0000000..901253b
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/__init__.py
@@ -0,0 +1 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
diff --git a/slider-agent/src/main/python/kazoo/tests/test_barrier.py b/slider-agent/src/main/python/kazoo/tests/test_barrier.py
new file mode 100644
index 0000000..461664f
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_barrier.py
@@ -0,0 +1,158 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import threading
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+
+
+class KazooBarrierTests(KazooTestCase):
+ def test_barrier_not_exist(self):
+ b = self.client.Barrier("/some/path")
+ eq_(b.wait(), True)
+
+ def test_barrier_exists(self):
+ b = self.client.Barrier("/some/path")
+ b.create()
+ eq_(b.wait(0), False)
+ b.remove()
+ eq_(b.wait(), True)
+
+ def test_remove_nonexistent_barrier(self):
+ b = self.client.Barrier("/some/path")
+ eq_(b.remove(), False)
+
+
+class KazooDoubleBarrierTests(KazooTestCase):
+
+ def test_basic_barrier(self):
+ b = self.client.DoubleBarrier("/some/path", 1)
+ eq_(b.participating, False)
+ b.enter()
+ eq_(b.participating, True)
+ b.leave()
+ eq_(b.participating, False)
+
+ def test_two_barrier(self):
+ av = threading.Event()
+ ev = threading.Event()
+ bv = threading.Event()
+ release_all = threading.Event()
+ b1 = self.client.DoubleBarrier("/some/path", 2)
+ b2 = self.client.DoubleBarrier("/some/path", 2)
+
+ def make_barrier_one():
+ b1.enter()
+ ev.set()
+ release_all.wait()
+ b1.leave()
+ ev.set()
+
+ def make_barrier_two():
+ bv.wait()
+ b2.enter()
+ av.set()
+ release_all.wait()
+ b2.leave()
+ av.set()
+
+ # Spin up both of them
+ t1 = threading.Thread(target=make_barrier_one)
+ t1.start()
+ t2 = threading.Thread(target=make_barrier_two)
+ t2.start()
+
+ eq_(b1.participating, False)
+ eq_(b2.participating, False)
+
+ bv.set()
+ av.wait()
+ ev.wait()
+ eq_(b1.participating, True)
+ eq_(b2.participating, True)
+
+ av.clear()
+ ev.clear()
+
+ release_all.set()
+ av.wait()
+ ev.wait()
+ eq_(b1.participating, False)
+ eq_(b2.participating, False)
+ t1.join()
+ t2.join()
+
+ def test_three_barrier(self):
+ av = threading.Event()
+ ev = threading.Event()
+ bv = threading.Event()
+ release_all = threading.Event()
+ b1 = self.client.DoubleBarrier("/some/path", 3)
+ b2 = self.client.DoubleBarrier("/some/path", 3)
+ b3 = self.client.DoubleBarrier("/some/path", 3)
+
+ def make_barrier_one():
+ b1.enter()
+ ev.set()
+ release_all.wait()
+ b1.leave()
+ ev.set()
+
+ def make_barrier_two():
+ bv.wait()
+ b2.enter()
+ av.set()
+ release_all.wait()
+ b2.leave()
+ av.set()
+
+ # Spin up both of them
+ t1 = threading.Thread(target=make_barrier_one)
+ t1.start()
+ t2 = threading.Thread(target=make_barrier_two)
+ t2.start()
+
+ eq_(b1.participating, False)
+ eq_(b2.participating, False)
+
+ bv.set()
+ eq_(b1.participating, False)
+ eq_(b2.participating, False)
+ b3.enter()
+ ev.wait()
+ av.wait()
+
+ eq_(b1.participating, True)
+ eq_(b2.participating, True)
+ eq_(b3.participating, True)
+
+ av.clear()
+ ev.clear()
+
+ release_all.set()
+ b3.leave()
+ av.wait()
+ ev.wait()
+ eq_(b1.participating, False)
+ eq_(b2.participating, False)
+ eq_(b3.participating, False)
+ t1.join()
+ t2.join()
+
+ def test_barrier_existing_parent_node(self):
+ b = self.client.DoubleBarrier('/some/path', 1)
+ self.assertFalse(b.participating)
+ self.client.create('/some', ephemeral=True)
+ # the barrier cannot create children under an ephemeral node
+ b.enter()
+ self.assertFalse(b.participating)
+
+ def test_barrier_existing_node(self):
+ b = self.client.DoubleBarrier('/some', 1)
+ self.assertFalse(b.participating)
+ self.client.ensure_path(b.path)
+ self.client.create(b.create_path, ephemeral=True)
+ # the barrier will re-use an existing node
+ b.enter()
+ self.assertTrue(b.participating)
+ b.leave()
diff --git a/slider-agent/src/main/python/kazoo/tests/test_build.py b/slider-agent/src/main/python/kazoo/tests/test_build.py
new file mode 100644
index 0000000..0f75d7c
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_build.py
@@ -0,0 +1,30 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import os
+
+from nose import SkipTest
+
+from kazoo.testing import KazooTestCase
+
+
+class TestBuildEnvironment(KazooTestCase):
+
+ def setUp(self):
+ KazooTestCase.setUp(self)
+ if not os.environ.get('TRAVIS'):
+ raise SkipTest('Only run build config tests on Travis.')
+
+ def test_gevent_version(self):
+ try:
+ import gevent
+ except ImportError:
+ raise SkipTest('gevent not available.')
+ env_version = os.environ.get('GEVENT_VERSION')
+ if env_version:
+ self.assertEqual(env_version, gevent.__version__)
+
+ def test_zookeeper_version(self):
+ server_version = self.client.server_version()
+ server_version = '.'.join([str(i) for i in server_version])
+ env_version = os.environ.get('ZOOKEEPER_VERSION')
+ if env_version:
+ self.assertEqual(env_version, server_version)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_client.py b/slider-agent/src/main/python/kazoo/tests/test_client.py
new file mode 100644
index 0000000..eb19ef5
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_client.py
@@ -0,0 +1,1099 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import socket
+import sys
+import threading
+import time
+import uuid
+import unittest
+
+from mock import patch
+from nose import SkipTest
+from nose.tools import eq_
+from nose.tools import raises
+
+from kazoo.testing import KazooTestCase
+from kazoo.exceptions import (
+ AuthFailedError,
+ BadArgumentsError,
+ ConfigurationError,
+ ConnectionClosedError,
+ ConnectionLoss,
+ InvalidACLError,
+ NoAuthError,
+ NoNodeError,
+ NodeExistsError,
+ SessionExpiredError,
+)
+from kazoo.protocol.connection import _CONNECTION_DROP
+from kazoo.protocol.states import KeeperState, KazooState
+from kazoo.tests.util import TRAVIS_ZK_VERSION
+
+
+if sys.version_info > (3, ): # pragma: nocover
+ def u(s):
+ return s
+else: # pragma: nocover
+ def u(s):
+ return unicode(s, "unicode_escape")
+
+
+class TestClientTransitions(KazooTestCase):
+ def test_connection_and_disconnection(self):
+ states = []
+ rc = threading.Event()
+
+ @self.client.add_listener
+ def listener(state):
+ states.append(state)
+ if state == KazooState.CONNECTED:
+ rc.set()
+
+ self.client.stop()
+ eq_(states, [KazooState.LOST])
+ states.pop()
+
+ self.client.start()
+ rc.wait(2)
+ eq_(states, [KazooState.CONNECTED])
+ rc.clear()
+ states.pop()
+ self.expire_session()
+ rc.wait(2)
+
+ req_states = [KazooState.LOST, KazooState.CONNECTED]
+ eq_(states, req_states)
+
+
+class TestClientConstructor(unittest.TestCase):
+
+ def _makeOne(self, *args, **kw):
+ from kazoo.client import KazooClient
+ return KazooClient(*args, **kw)
+
+ def test_invalid_handler(self):
+ from kazoo.handlers.threading import SequentialThreadingHandler
+ self.assertRaises(ConfigurationError,
+ self._makeOne, handler=SequentialThreadingHandler)
+
+ def test_chroot(self):
+ self.assertEqual(self._makeOne(hosts='127.0.0.1:2181/').chroot, '')
+ self.assertEqual(self._makeOne(hosts='127.0.0.1:2181/a').chroot, '/a')
+ self.assertEqual(self._makeOne(hosts='127.0.0.1/a').chroot, '/a')
+ self.assertEqual(self._makeOne(hosts='127.0.0.1/a/b').chroot, '/a/b')
+ self.assertEqual(self._makeOne(
+ hosts='127.0.0.1:2181,127.0.0.1:2182/a/b').chroot, '/a/b')
+
+ def test_connection_timeout(self):
+ from kazoo.handlers.threading import TimeoutError
+ client = self._makeOne(hosts='127.0.0.1:9')
+ self.assertTrue(client.handler.timeout_exception is TimeoutError)
+ self.assertRaises(TimeoutError, client.start, 0.1)
+
+ def test_ordered_host_selection(self):
+ client = self._makeOne(hosts='127.0.0.1:9,127.0.0.2:9/a',
+ randomize_hosts=False)
+ hosts = [h for h in client.hosts]
+ eq_(hosts, [('127.0.0.1', 9), ('127.0.0.2', 9)])
+
+ def test_invalid_hostname(self):
+ client = self._makeOne(hosts='nosuchhost/a')
+ timeout = client.handler.timeout_exception
+ self.assertRaises(timeout, client.start, 0.1)
+
+ def test_retry_options_dict(self):
+ from kazoo.retry import KazooRetry
+ client = self._makeOne(command_retry=dict(max_tries=99),
+ connection_retry=dict(delay=99))
+ self.assertTrue(type(client._conn_retry) is KazooRetry)
+ self.assertTrue(type(client._retry) is KazooRetry)
+ eq_(client._retry.max_tries, 99)
+ eq_(client._conn_retry.delay, 99)
+
+
+class TestAuthentication(KazooTestCase):
+
+ def _makeAuth(self, *args, **kwargs):
+ from kazoo.security import make_digest_acl
+ return make_digest_acl(*args, **kwargs)
+
+ def test_auth(self):
+ username = uuid.uuid4().hex
+ password = uuid.uuid4().hex
+
+ digest_auth = "%s:%s" % (username, password)
+ acl = self._makeAuth(username, password, all=True)
+
+ client = self._get_client()
+ client.start()
+ client.add_auth("digest", digest_auth)
+ client.default_acl = (acl,)
+
+ try:
+ client.create("/1")
+ client.create("/1/2")
+ client.ensure_path("/1/2/3")
+
+ eve = self._get_client()
+ eve.start()
+
+ self.assertRaises(NoAuthError, eve.get, "/1/2")
+
+ # try again with the wrong auth token
+ eve.add_auth("digest", "badbad:bad")
+
+ self.assertRaises(NoAuthError, eve.get, "/1/2")
+ finally:
+ # Ensure we remove the ACL protected nodes
+ client.delete("/1", recursive=True)
+ eve.stop()
+ eve.close()
+
+ def test_connect_auth(self):
+ username = uuid.uuid4().hex
+ password = uuid.uuid4().hex
+
+ digest_auth = "%s:%s" % (username, password)
+ acl = self._makeAuth(username, password, all=True)
+
+ client = self._get_client(auth_data=[('digest', digest_auth)])
+ client.start()
+ try:
+ client.create('/1', acl=(acl,))
+ # give ZK a chance to copy data to other node
+ time.sleep(0.1)
+ self.assertRaises(NoAuthError, self.client.get, "/1")
+ finally:
+ client.delete('/1')
+ client.stop()
+ client.close()
+
+ def test_unicode_auth(self):
+ username = u("xe4/\hm")
+ password = u("/\xe4hm")
+ digest_auth = "%s:%s" % (username, password)
+ acl = self._makeAuth(username, password, all=True)
+
+ client = self._get_client()
+ client.start()
+ client.add_auth("digest", digest_auth)
+ client.default_acl = (acl,)
+
+ try:
+ client.create("/1")
+ client.ensure_path("/1/2/3")
+
+ eve = self._get_client()
+ eve.start()
+
+ self.assertRaises(NoAuthError, eve.get, "/1/2")
+
+ # try again with the wrong auth token
+ eve.add_auth("digest", "badbad:bad")
+
+ self.assertRaises(NoAuthError, eve.get, "/1/2")
+ finally:
+ # Ensure we remove the ACL protected nodes
+ client.delete("/1", recursive=True)
+ eve.stop()
+ eve.close()
+
+ def test_invalid_auth(self):
+ client = self._get_client()
+ client.start()
+ self.assertRaises(TypeError, client.add_auth,
+ 'digest', ('user', 'pass'))
+ self.assertRaises(TypeError, client.add_auth,
+ None, ('user', 'pass'))
+
+ def test_async_auth(self):
+ client = self._get_client()
+ client.start()
+ username = uuid.uuid4().hex
+ password = uuid.uuid4().hex
+ digest_auth = "%s:%s" % (username, password)
+ result = client.add_auth_async("digest", digest_auth)
+ self.assertTrue(result.get())
+
+ def test_async_auth_failure(self):
+ client = self._get_client()
+ client.start()
+ username = uuid.uuid4().hex
+ password = uuid.uuid4().hex
+ digest_auth = "%s:%s" % (username, password)
+
+ self.assertRaises(AuthFailedError, client.add_auth,
+ "unknown-scheme", digest_auth)
+
+ def test_add_auth_on_reconnect(self):
+ client = self._get_client()
+ client.start()
+ client.add_auth("digest", "jsmith:jsmith")
+ client._connection._socket.shutdown(socket.SHUT_RDWR)
+ while not client.connected:
+ time.sleep(0.1)
+ self.assertTrue(("digest", "jsmith:jsmith") in client.auth_data)
+
+
+class TestConnection(KazooTestCase):
+
+ def test_chroot_warning(self):
+ k = self._get_nonchroot_client()
+ k.chroot = 'abba'
+ try:
+ with patch('warnings.warn') as mock_func:
+ k.start()
+ assert mock_func.called
+ finally:
+ k.stop()
+
+ def test_session_expire(self):
+ from kazoo.protocol.states import KazooState
+
+ cv = threading.Event()
+
+ def watch_events(event):
+ if event == KazooState.LOST:
+ cv.set()
+
+ self.client.add_listener(watch_events)
+ self.expire_session()
+ cv.wait(3)
+ assert cv.is_set()
+
+ def test_bad_session_expire(self):
+ from kazoo.protocol.states import KazooState
+
+ cv = threading.Event()
+ ab = threading.Event()
+
+ def watch_events(event):
+ if event == KazooState.LOST:
+ ab.set()
+ raise Exception("oops")
+ cv.set()
+
+ self.client.add_listener(watch_events)
+ self.expire_session()
+ ab.wait(0.5)
+ assert ab.is_set()
+ cv.wait(0.5)
+ assert not cv.is_set()
+
+ def test_state_listener(self):
+ from kazoo.protocol.states import KazooState
+ states = []
+ condition = threading.Condition()
+
+ def listener(state):
+ with condition:
+ states.append(state)
+ condition.notify_all()
+
+ self.client.stop()
+ eq_(self.client.state, KazooState.LOST)
+ self.client.add_listener(listener)
+ self.client.start(5)
+
+ with condition:
+ if not states:
+ condition.wait(5)
+
+ eq_(len(states), 1)
+ eq_(states[0], KazooState.CONNECTED)
+
+ def test_invalid_listener(self):
+ self.assertRaises(ConfigurationError, self.client.add_listener, 15)
+
+ def test_listener_only_called_on_real_state_change(self):
+ from kazoo.protocol.states import KazooState
+ self.assertTrue(self.client.state, KazooState.CONNECTED)
+ called = [False]
+ condition = threading.Event()
+
+ def listener(state):
+ called[0] = True
+ condition.set()
+
+ self.client.add_listener(listener)
+ self.client._make_state_change(KazooState.CONNECTED)
+ condition.wait(3)
+ self.assertFalse(called[0])
+
+ def test_no_connection(self):
+ client = self.client
+ client.stop()
+ self.assertFalse(client.connected)
+ self.assertTrue(client.client_id is None)
+ self.assertRaises(ConnectionClosedError, client.exists, '/')
+
+ def test_close_connecting_connection(self):
+ client = self.client
+ client.stop()
+ ev = threading.Event()
+
+ def close_on_connecting(state):
+ if state in (KazooState.CONNECTED, KazooState.LOST):
+ ev.set()
+
+ client.add_listener(close_on_connecting)
+ client.start()
+
+ # Wait until we connect
+ ev.wait(5)
+ ev.clear()
+ self.client._call(_CONNECTION_DROP, client.handler.async_result())
+
+ client.stop()
+
+ # ...and then wait until the connection is lost
+ ev.wait(5)
+
+ self.assertRaises(ConnectionClosedError,
+ self.client.create, '/foobar')
+
+ def test_double_start(self):
+ self.assertTrue(self.client.connected)
+ self.client.start()
+ self.assertTrue(self.client.connected)
+
+ def test_double_stop(self):
+ self.client.stop()
+ self.assertFalse(self.client.connected)
+ self.client.stop()
+ self.assertFalse(self.client.connected)
+
+ def test_restart(self):
+ self.assertTrue(self.client.connected)
+ self.client.restart()
+ self.assertTrue(self.client.connected)
+
+ def test_closed(self):
+ client = self.client
+ client.stop()
+
+ write_pipe = client._connection._write_pipe
+
+ # close the connection to free the pipe
+ client.close()
+ eq_(client._connection._write_pipe, None)
+
+ # sneak in and patch client to simulate race between a thread
+ # calling stop(); close() and one running a command
+ oldstate = client._state
+ client._state = KeeperState.CONNECTED
+ client._connection._write_pipe = write_pipe
+ try:
+ # simulate call made after write pipe is closed
+ self.assertRaises(ConnectionClosedError, client.exists, '/')
+
+ # simualte call made after write pipe is set to None
+ client._connection._write_pipe = None
+ self.assertRaises(ConnectionClosedError, client.exists, '/')
+
+ finally:
+ # reset for teardown
+ client._state = oldstate
+ client._connection._write_pipe = None
+
+
+class TestClient(KazooTestCase):
+ def _getKazooState(self):
+ from kazoo.protocol.states import KazooState
+ return KazooState
+
+ def test_client_id(self):
+ client_id = self.client.client_id
+ self.assertEqual(type(client_id), tuple)
+ # make sure password is of correct length
+ self.assertEqual(len(client_id[1]), 16)
+
+ def test_connected(self):
+ client = self.client
+ self.assertTrue(client.connected)
+
+ def test_create(self):
+ client = self.client
+ path = client.create("/1")
+ eq_(path, "/1")
+ self.assertTrue(client.exists("/1"))
+
+ def test_create_on_broken_connection(self):
+ client = self.client
+ client.start()
+
+ client._state = KeeperState.EXPIRED_SESSION
+ self.assertRaises(SessionExpiredError, client.create,
+ '/closedpath', b'bar')
+
+ client._state = KeeperState.AUTH_FAILED
+ self.assertRaises(AuthFailedError, client.create,
+ '/closedpath', b'bar')
+
+ client._state = KeeperState.CONNECTING
+ self.assertRaises(SessionExpiredError, client.create,
+ '/closedpath', b'bar')
+ client.stop()
+ client.close()
+
+ self.assertRaises(ConnectionClosedError, client.create,
+ '/closedpath', b'bar')
+
+ def test_create_null_data(self):
+ client = self.client
+ client.create("/nulldata", None)
+ value, _ = client.get("/nulldata")
+ self.assertEqual(value, None)
+
+ def test_create_empty_string(self):
+ client = self.client
+ client.create("/empty", b"")
+ value, _ = client.get("/empty")
+ eq_(value, b"")
+
+ def test_create_unicode_path(self):
+ client = self.client
+ path = client.create(u("/ascii"))
+ eq_(path, u("/ascii"))
+ path = client.create(u("/\xe4hm"))
+ eq_(path, u("/\xe4hm"))
+
+ def test_create_async_returns_unchrooted_path(self):
+ client = self.client
+ path = client.create_async('/1').get()
+ eq_(path, "/1")
+
+ def test_create_invalid_path(self):
+ client = self.client
+ self.assertRaises(TypeError, client.create, ('a', ))
+ self.assertRaises(ValueError, client.create, ".")
+ self.assertRaises(ValueError, client.create, "/a/../b")
+ self.assertRaises(BadArgumentsError, client.create, "/b\x00")
+ self.assertRaises(BadArgumentsError, client.create, "/b\x1e")
+
+ def test_create_invalid_arguments(self):
+ from kazoo.security import OPEN_ACL_UNSAFE
+ single_acl = OPEN_ACL_UNSAFE[0]
+ client = self.client
+ self.assertRaises(TypeError, client.create, 'a', acl='all')
+ self.assertRaises(TypeError, client.create, 'a', acl=single_acl)
+ self.assertRaises(TypeError, client.create, 'a', value=['a'])
+ self.assertRaises(TypeError, client.create, 'a', ephemeral='yes')
+ self.assertRaises(TypeError, client.create, 'a', sequence='yes')
+ self.assertRaises(TypeError, client.create, 'a', makepath='yes')
+
+ def test_create_value(self):
+ client = self.client
+ client.create("/1", b"bytes")
+ data, stat = client.get("/1")
+ eq_(data, b"bytes")
+
+ def test_create_unicode_value(self):
+ client = self.client
+ self.assertRaises(TypeError, client.create, "/1", u("\xe4hm"))
+
+ def test_create_large_value(self):
+ client = self.client
+ kb_512 = b"a" * (512 * 1024)
+ client.create("/1", kb_512)
+ self.assertTrue(client.exists("/1"))
+ mb_2 = b"a" * (2 * 1024 * 1024)
+ self.assertRaises(ConnectionLoss, client.create, "/2", mb_2)
+
+ def test_create_acl_duplicate(self):
+ from kazoo.security import OPEN_ACL_UNSAFE
+ single_acl = OPEN_ACL_UNSAFE[0]
+ client = self.client
+ client.create("/1", acl=[single_acl, single_acl])
+ acls, stat = client.get_acls("/1")
+ # ZK >3.4 removes duplicate ACL entries
+ if TRAVIS_ZK_VERSION:
+ version = TRAVIS_ZK_VERSION
+ else:
+ version = client.server_version()
+ self.assertEqual(len(acls), 1 if version > (3, 4) else 2)
+
+ def test_create_acl_empty_list(self):
+ from kazoo.security import OPEN_ACL_UNSAFE
+ client = self.client
+ client.create("/1", acl=[])
+ acls, stat = client.get_acls("/1")
+ self.assertEqual(acls, OPEN_ACL_UNSAFE)
+
+ def test_version_no_connection(self):
+ @raises(ConnectionLoss)
+ def testit():
+ self.client.server_version()
+ self.client.stop()
+ testit()
+
+ def test_create_ephemeral(self):
+ client = self.client
+ client.create("/1", b"ephemeral", ephemeral=True)
+ data, stat = client.get("/1")
+ eq_(data, b"ephemeral")
+ eq_(stat.ephemeralOwner, client.client_id[0])
+
+ def test_create_no_ephemeral(self):
+ client = self.client
+ client.create("/1", b"val1")
+ data, stat = client.get("/1")
+ self.assertFalse(stat.ephemeralOwner)
+
+ def test_create_ephemeral_no_children(self):
+ from kazoo.exceptions import NoChildrenForEphemeralsError
+ client = self.client
+ client.create("/1", b"ephemeral", ephemeral=True)
+ self.assertRaises(NoChildrenForEphemeralsError,
+ client.create, "/1/2", b"val1")
+ self.assertRaises(NoChildrenForEphemeralsError,
+ client.create, "/1/2", b"val1", ephemeral=True)
+
+ def test_create_sequence(self):
+ client = self.client
+ client.create("/folder")
+ path = client.create("/folder/a", b"sequence", sequence=True)
+ eq_(path, "/folder/a0000000000")
+ path2 = client.create("/folder/a", b"sequence", sequence=True)
+ eq_(path2, "/folder/a0000000001")
+ path3 = client.create("/folder/", b"sequence", sequence=True)
+ eq_(path3, "/folder/0000000002")
+
+ def test_create_ephemeral_sequence(self):
+ basepath = "/" + uuid.uuid4().hex
+ realpath = self.client.create(basepath, b"sandwich", sequence=True,
+ ephemeral=True)
+ self.assertTrue(basepath != realpath and realpath.startswith(basepath))
+ data, stat = self.client.get(realpath)
+ eq_(data, b"sandwich")
+
+ def test_create_makepath(self):
+ self.client.create("/1/2", b"val1", makepath=True)
+ data, stat = self.client.get("/1/2")
+ eq_(data, b"val1")
+
+ self.client.create("/1/2/3/4/5", b"val2", makepath=True)
+ data, stat = self.client.get("/1/2/3/4/5")
+ eq_(data, b"val2")
+
+ self.assertRaises(NodeExistsError, self.client.create, "/1/2/3/4/5",
+ b"val2", makepath=True)
+
+ def test_create_makepath_incompatible_acls(self):
+ from kazoo.client import KazooClient
+ from kazoo.security import make_digest_acl_credential, CREATOR_ALL_ACL
+ credential = make_digest_acl_credential("username", "password")
+ alt_client = KazooClient(self.cluster[0].address + self.client.chroot,
+ max_retries=5, auth_data=[("digest", credential)])
+ alt_client.start()
+ alt_client.create("/1/2", b"val2", makepath=True, acl=CREATOR_ALL_ACL)
+
+ try:
+ self.assertRaises(NoAuthError, self.client.create, "/1/2/3/4/5",
+ b"val2", makepath=True)
+ finally:
+ alt_client.delete('/', recursive=True)
+ alt_client.stop()
+
+ def test_create_no_makepath(self):
+ self.assertRaises(NoNodeError, self.client.create, "/1/2", b"val1")
+ self.assertRaises(NoNodeError, self.client.create, "/1/2", b"val1",
+ makepath=False)
+
+ self.client.create("/1/2", b"val1", makepath=True)
+ self.assertRaises(NoNodeError, self.client.create, "/1/2/3/4", b"val1",
+ makepath=False)
+
+ def test_create_exists(self):
+ from kazoo.exceptions import NodeExistsError
+ client = self.client
+ path = client.create("/1")
+ self.assertRaises(NodeExistsError, client.create, path)
+
+ def test_create_get_set(self):
+ nodepath = "/" + uuid.uuid4().hex
+
+ self.client.create(nodepath, b"sandwich", ephemeral=True)
+
+ data, stat = self.client.get(nodepath)
+ eq_(data, b"sandwich")
+
+ newstat = self.client.set(nodepath, b"hats", stat.version)
+ self.assertTrue(newstat)
+ assert newstat.version > stat.version
+
+ # Some other checks of the ZnodeStat object we got
+ eq_(newstat.acl_version, stat.acl_version)
+ eq_(newstat.created, stat.ctime / 1000.0)
+ eq_(newstat.last_modified, newstat.mtime / 1000.0)
+ eq_(newstat.owner_session_id, stat.ephemeralOwner)
+ eq_(newstat.creation_transaction_id, stat.czxid)
+ eq_(newstat.last_modified_transaction_id, newstat.mzxid)
+ eq_(newstat.data_length, newstat.dataLength)
+ eq_(newstat.children_count, stat.numChildren)
+ eq_(newstat.children_version, stat.cversion)
+
+ def test_get_invalid_arguments(self):
+ client = self.client
+ self.assertRaises(TypeError, client.get, ('a', 'b'))
+ self.assertRaises(TypeError, client.get, 'a', watch=True)
+
+ def test_bad_argument(self):
+ client = self.client
+ client.ensure_path("/1")
+ self.assertRaises(TypeError, self.client.set, "/1", 1)
+
+ def test_ensure_path(self):
+ client = self.client
+ client.ensure_path("/1/2")
+ self.assertTrue(client.exists("/1/2"))
+
+ client.ensure_path("/1/2/3/4")
+ self.assertTrue(client.exists("/1/2/3/4"))
+
+ def test_sync(self):
+ client = self.client
+ self.assertTrue(client.sync('/'), '/')
+
+ def test_exists(self):
+ nodepath = "/" + uuid.uuid4().hex
+
+ exists = self.client.exists(nodepath)
+ eq_(exists, None)
+
+ self.client.create(nodepath, b"sandwich", ephemeral=True)
+ exists = self.client.exists(nodepath)
+ self.assertTrue(exists)
+ assert isinstance(exists.version, int)
+
+ multi_node_nonexistent = "/" + uuid.uuid4().hex + "/hats"
+ exists = self.client.exists(multi_node_nonexistent)
+ eq_(exists, None)
+
+ def test_exists_invalid_arguments(self):
+ client = self.client
+ self.assertRaises(TypeError, client.exists, ('a', 'b'))
+ self.assertRaises(TypeError, client.exists, 'a', watch=True)
+
+ def test_exists_watch(self):
+ nodepath = "/" + uuid.uuid4().hex
+ event = self.client.handler.event_object()
+
+ def w(watch_event):
+ eq_(watch_event.path, nodepath)
+ event.set()
+
+ exists = self.client.exists(nodepath, watch=w)
+ eq_(exists, None)
+
+ self.client.create(nodepath, ephemeral=True)
+
+ event.wait(1)
+ self.assertTrue(event.is_set())
+
+ def test_exists_watcher_exception(self):
+ nodepath = "/" + uuid.uuid4().hex
+ event = self.client.handler.event_object()
+
+ # if the watcher throws an exception, all we can really do is log it
+ def w(watch_event):
+ eq_(watch_event.path, nodepath)
+ event.set()
+
+ raise Exception("test exception in callback")
+
+ exists = self.client.exists(nodepath, watch=w)
+ eq_(exists, None)
+
+ self.client.create(nodepath, ephemeral=True)
+
+ event.wait(1)
+ self.assertTrue(event.is_set())
+
+ def test_create_delete(self):
+ nodepath = "/" + uuid.uuid4().hex
+
+ self.client.create(nodepath, b"zzz")
+
+ self.client.delete(nodepath)
+
+ exists = self.client.exists(nodepath)
+ eq_(exists, None)
+
+ def test_get_acls(self):
+ from kazoo.security import make_digest_acl
+ acl = make_digest_acl('user', 'pass', all=True)
+ client = self.client
+ try:
+ client.create('/a', acl=[acl])
+ self.assertTrue(acl in client.get_acls('/a')[0])
+ finally:
+ client.delete('/a')
+
+ def test_get_acls_invalid_arguments(self):
+ client = self.client
+ self.assertRaises(TypeError, client.get_acls, ('a', 'b'))
+
+ def test_set_acls(self):
+ from kazoo.security import make_digest_acl
+ acl = make_digest_acl('user', 'pass', all=True)
+ client = self.client
+ client.create('/a')
+ try:
+ client.set_acls('/a', [acl])
+ self.assertTrue(acl in client.get_acls('/a')[0])
+ finally:
+ client.delete('/a')
+
+ def test_set_acls_empty(self):
+ client = self.client
+ client.create('/a')
+ self.assertRaises(InvalidACLError, client.set_acls, '/a', [])
+
+ def test_set_acls_no_node(self):
+ from kazoo.security import OPEN_ACL_UNSAFE
+ client = self.client
+ self.assertRaises(NoNodeError, client.set_acls, '/a', OPEN_ACL_UNSAFE)
+
+ def test_set_acls_invalid_arguments(self):
+ from kazoo.security import OPEN_ACL_UNSAFE
+ single_acl = OPEN_ACL_UNSAFE[0]
+ client = self.client
+ self.assertRaises(TypeError, client.set_acls, ('a', 'b'), ())
+ self.assertRaises(TypeError, client.set_acls, 'a', single_acl)
+ self.assertRaises(TypeError, client.set_acls, 'a', 'all')
+ self.assertRaises(TypeError, client.set_acls, 'a', [single_acl], 'V1')
+
+ def test_set(self):
+ client = self.client
+ client.create('a', b'first')
+ stat = client.set('a', b'second')
+ data, stat2 = client.get('a')
+ self.assertEqual(data, b'second')
+ self.assertEqual(stat, stat2)
+
+ def test_set_null_data(self):
+ client = self.client
+ client.create("/nulldata", b"not none")
+ client.set("/nulldata", None)
+ value, _ = client.get("/nulldata")
+ self.assertEqual(value, None)
+
+ def test_set_empty_string(self):
+ client = self.client
+ client.create("/empty", b"not empty")
+ client.set("/empty", b"")
+ value, _ = client.get("/empty")
+ eq_(value, b"")
+
+ def test_set_invalid_arguments(self):
+ client = self.client
+ client.create('a', b'first')
+ self.assertRaises(TypeError, client.set, ('a', 'b'), b'value')
+ self.assertRaises(TypeError, client.set, 'a', ['v', 'w'])
+ self.assertRaises(TypeError, client.set, 'a', b'value', 'V1')
+
+ def test_delete(self):
+ client = self.client
+ client.ensure_path('/a/b')
+ self.assertTrue('b' in client.get_children('a'))
+ client.delete('/a/b')
+ self.assertFalse('b' in client.get_children('a'))
+
+ def test_delete_recursive(self):
+ client = self.client
+ client.ensure_path('/a/b/c')
+ client.ensure_path('/a/b/d')
+ client.delete('/a/b', recursive=True)
+ client.delete('/a/b/c', recursive=True)
+ self.assertFalse('b' in client.get_children('a'))
+
+ def test_delete_invalid_arguments(self):
+ client = self.client
+ client.ensure_path('/a/b')
+ self.assertRaises(TypeError, client.delete, '/a/b', recursive='all')
+ self.assertRaises(TypeError, client.delete, ('a', 'b'))
+ self.assertRaises(TypeError, client.delete, '/a/b', version='V1')
+
+ def test_get_children(self):
+ client = self.client
+ client.ensure_path('/a/b/c')
+ client.ensure_path('/a/b/d')
+ self.assertEqual(client.get_children('/a'), ['b'])
+ self.assertEqual(set(client.get_children('/a/b')), set(['c', 'd']))
+ self.assertEqual(client.get_children('/a/b/c'), [])
+
+ def test_get_children2(self):
+ client = self.client
+ client.ensure_path('/a/b')
+ children, stat = client.get_children('/a', include_data=True)
+ value, stat2 = client.get('/a')
+ self.assertEqual(children, ['b'])
+ self.assertEqual(stat2.version, stat.version)
+
+ def test_get_children2_many_nodes(self):
+ client = self.client
+ client.ensure_path('/a/b')
+ client.ensure_path('/a/c')
+ client.ensure_path('/a/d')
+ children, stat = client.get_children('/a', include_data=True)
+ value, stat2 = client.get('/a')
+ self.assertEqual(set(children), set(['b', 'c', 'd']))
+ self.assertEqual(stat2.version, stat.version)
+
+ def test_get_children_no_node(self):
+ client = self.client
+ self.assertRaises(NoNodeError, client.get_children, '/none')
+ self.assertRaises(NoNodeError, client.get_children,
+ '/none', include_data=True)
+
+ def test_get_children_invalid_path(self):
+ client = self.client
+ self.assertRaises(ValueError, client.get_children, '../a')
+
+ def test_get_children_invalid_arguments(self):
+ client = self.client
+ self.assertRaises(TypeError, client.get_children, ('a', 'b'))
+ self.assertRaises(TypeError, client.get_children, 'a', watch=True)
+ self.assertRaises(TypeError, client.get_children,
+ 'a', include_data='yes')
+
+ def test_invalid_auth(self):
+ from kazoo.exceptions import AuthFailedError
+ from kazoo.protocol.states import KeeperState
+
+ client = self.client
+ client.stop()
+ client._state = KeeperState.AUTH_FAILED
+
+ @raises(AuthFailedError)
+ def testit():
+ client.get('/')
+ testit()
+
+ def test_client_state(self):
+ from kazoo.protocol.states import KeeperState
+ eq_(self.client.client_state, KeeperState.CONNECTED)
+
+ def test_update_host_list(self):
+ from kazoo.client import KazooClient
+ from kazoo.protocol.states import KeeperState
+ hosts = self.cluster[0].address
+ # create a client with only one server in its list
+ client = KazooClient(hosts=hosts)
+ client.start()
+
+ # try to change the chroot, not currently allowed
+ self.assertRaises(ConfigurationError,
+ client.set_hosts, hosts + '/new_chroot')
+
+ # grow the cluster to 3
+ client.set_hosts(self.servers)
+
+ # shut down the first host
+ try:
+ self.cluster[0].stop()
+ time.sleep(5)
+ eq_(client.client_state, KeeperState.CONNECTED)
+ finally:
+ self.cluster[0].run()
+
+
+dummy_dict = {
+ 'aversion': 1, 'ctime': 0, 'cversion': 1,
+ 'czxid': 110, 'dataLength': 1, 'ephemeralOwner': 'ben',
+ 'mtime': 1, 'mzxid': 1, 'numChildren': 0, 'pzxid': 1, 'version': 1
+}
+
+
+class TestClientTransactions(KazooTestCase):
+
+ def setUp(self):
+ KazooTestCase.setUp(self)
+ skip = False
+ if TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION < (3, 4):
+ skip = True
+ elif TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION >= (3, 4):
+ skip = False
+ else:
+ ver = self.client.server_version()
+ if ver[1] < 4:
+ skip = True
+ if skip:
+ raise SkipTest("Must use Zookeeper 3.4 or above")
+
+ def test_basic_create(self):
+ t = self.client.transaction()
+ t.create('/freddy')
+ t.create('/fred', ephemeral=True)
+ t.create('/smith', sequence=True)
+ results = t.commit()
+ eq_(results[0], '/freddy')
+ eq_(len(results), 3)
+ self.assertTrue(results[2].startswith('/smith0'))
+
+ def test_bad_creates(self):
+ args_list = [(True,), ('/smith', 0), ('/smith', b'', 'bleh'),
+ ('/smith', b'', None, 'fred'),
+ ('/smith', b'', None, True, 'fred')]
+
+ @raises(TypeError)
+ def testit(args):
+ t = self.client.transaction()
+ t.create(*args)
+
+ for args in args_list:
+ testit(args)
+
+ def test_default_acl(self):
+ from kazoo.security import make_digest_acl
+ username = uuid.uuid4().hex
+ password = uuid.uuid4().hex
+
+ digest_auth = "%s:%s" % (username, password)
+ acl = make_digest_acl(username, password, all=True)
+
+ self.client.add_auth("digest", digest_auth)
+ self.client.default_acl = (acl,)
+
+ t = self.client.transaction()
+ t.create('/freddy')
+ results = t.commit()
+ eq_(results[0], '/freddy')
+
+ def test_basic_delete(self):
+ self.client.create('/fred')
+ t = self.client.transaction()
+ t.delete('/fred')
+ results = t.commit()
+ eq_(results[0], True)
+
+ def test_bad_deletes(self):
+ args_list = [(True,), ('/smith', 'woops'), ]
+
+ @raises(TypeError)
+ def testit(args):
+ t = self.client.transaction()
+ t.delete(*args)
+
+ for args in args_list:
+ testit(args)
+
+ def test_set(self):
+ self.client.create('/fred', b'01')
+ t = self.client.transaction()
+ t.set_data('/fred', b'oops')
+ t.commit()
+ res = self.client.get('/fred')
+ eq_(res[0], b'oops')
+
+ def test_bad_sets(self):
+ args_list = [(42, 52), ('/smith', False), ('/smith', b'', 'oops')]
+
+ @raises(TypeError)
+ def testit(args):
+ t = self.client.transaction()
+ t.set_data(*args)
+
+ for args in args_list:
+ testit(args)
+
+ def test_check(self):
+ self.client.create('/fred')
+ version = self.client.get('/fred')[1].version
+ t = self.client.transaction()
+ t.check('/fred', version)
+ t.create('/blah')
+ results = t.commit()
+ eq_(results[0], True)
+ eq_(results[1], '/blah')
+
+ def test_bad_checks(self):
+ args_list = [(42, 52), ('/smith', 'oops')]
+
+ @raises(TypeError)
+ def testit(args):
+ t = self.client.transaction()
+ t.check(*args)
+
+ for args in args_list:
+ testit(args)
+
+ def test_bad_transaction(self):
+ from kazoo.exceptions import RolledBackError, NoNodeError
+ t = self.client.transaction()
+ t.create('/fred')
+ t.delete('/smith')
+ results = t.commit()
+ eq_(results[0].__class__, RolledBackError)
+ eq_(results[1].__class__, NoNodeError)
+
+ def test_bad_commit(self):
+ t = self.client.transaction()
+
+ @raises(ValueError)
+ def testit():
+ t.commit()
+
+ t.committed = True
+ testit()
+
+ def test_bad_context(self):
+ @raises(TypeError)
+ def testit():
+ with self.client.transaction() as t:
+ t.check(4232)
+ testit()
+
+ def test_context(self):
+ with self.client.transaction() as t:
+ t.create('/smith', b'32')
+ eq_(self.client.get('/smith')[0], b'32')
+
+
+class TestCallbacks(unittest.TestCase):
+ def test_session_callback_states(self):
+ from kazoo.protocol.states import KazooState, KeeperState
+ from kazoo.client import KazooClient
+
+ client = KazooClient()
+ client._handle = 1
+ client._live.set()
+
+ result = client._session_callback(KeeperState.CONNECTED)
+ eq_(result, None)
+
+ # Now with stopped
+ client._stopped.set()
+ result = client._session_callback(KeeperState.CONNECTED)
+ eq_(result, None)
+
+ # Test several state transitions
+ client._stopped.clear()
+ client.start_async = lambda: True
+ client._session_callback(KeeperState.CONNECTED)
+ eq_(client.state, KazooState.CONNECTED)
+
+ client._session_callback(KeeperState.AUTH_FAILED)
+ eq_(client.state, KazooState.LOST)
+
+ client._handle = 1
+ client._session_callback(-250)
+ eq_(client.state, KazooState.SUSPENDED)
+
+
+class TestNonChrootClient(KazooTestCase):
+
+ def test_create(self):
+ client = self._get_nonchroot_client()
+ self.assertEqual(client.chroot, '')
+ client.start()
+ node = uuid.uuid4().hex
+ path = client.create(node, ephemeral=True)
+ client.delete(path)
+ client.stop()
+
+ def test_unchroot(self):
+ client = self._get_nonchroot_client()
+ client.chroot = '/a'
+ self.assertEquals(client.unchroot('/a/b'), '/b')
+ self.assertEquals(client.unchroot('/b/c'), '/b/c')
diff --git a/slider-agent/src/main/python/kazoo/tests/test_connection.py b/slider-agent/src/main/python/kazoo/tests/test_connection.py
new file mode 100644
index 0000000..c764b03
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_connection.py
@@ -0,0 +1,320 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+from collections import namedtuple
+import os
+import errno
+import threading
+import time
+import uuid
+import struct
+
+from nose import SkipTest
+from nose.tools import eq_
+from nose.tools import raises
+import mock
+
+from kazoo.exceptions import ConnectionLoss
+from kazoo.protocol.serialization import (
+ Connect,
+ int_struct,
+ write_string,
+)
+from kazoo.protocol.states import KazooState
+from kazoo.protocol.connection import _CONNECTION_DROP
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import wait
+from kazoo.tests.util import TRAVIS_ZK_VERSION
+
+
+class Delete(namedtuple('Delete', 'path version')):
+ type = 2
+
+ def serialize(self):
+ b = bytearray()
+ b.extend(write_string(self.path))
+ b.extend(int_struct.pack(self.version))
+ return b
+
+ @classmethod
+ def deserialize(self, bytes, offset):
+ raise ValueError("oh my")
+
+
+class TestConnectionHandler(KazooTestCase):
+ def test_bad_deserialization(self):
+ async_object = self.client.handler.async_result()
+ self.client._queue.append((Delete(self.client.chroot, -1), async_object))
+ os.write(self.client._connection._write_pipe, b'\0')
+
+ @raises(ValueError)
+ def testit():
+ async_object.get()
+ testit()
+
+ def test_with_bad_sessionid(self):
+ ev = threading.Event()
+
+ def expired(state):
+ if state == KazooState.CONNECTED:
+ ev.set()
+
+ password = os.urandom(16)
+ client = self._get_client(client_id=(82838284824, password))
+ client.add_listener(expired)
+ client.start()
+ try:
+ ev.wait(15)
+ eq_(ev.is_set(), True)
+ finally:
+ client.stop()
+
+ def test_connection_read_timeout(self):
+ client = self.client
+ ev = threading.Event()
+ path = "/" + uuid.uuid4().hex
+ handler = client.handler
+ _select = handler.select
+ _socket = client._connection._socket
+
+ def delayed_select(*args, **kwargs):
+ result = _select(*args, **kwargs)
+ if len(args[0]) == 1 and _socket in args[0]:
+ # for any socket read, simulate a timeout
+ return [], [], []
+ return result
+
+ def back(state):
+ if state == KazooState.CONNECTED:
+ ev.set()
+
+ client.add_listener(back)
+ client.create(path, b"1")
+ try:
+ handler.select = delayed_select
+ self.assertRaises(ConnectionLoss, client.get, path)
+ finally:
+ handler.select = _select
+ # the client reconnects automatically
+ ev.wait(5)
+ eq_(ev.is_set(), True)
+ eq_(client.get(path)[0], b"1")
+
+ def test_connection_write_timeout(self):
+ client = self.client
+ ev = threading.Event()
+ path = "/" + uuid.uuid4().hex
+ handler = client.handler
+ _select = handler.select
+ _socket = client._connection._socket
+
+ def delayed_select(*args, **kwargs):
+ result = _select(*args, **kwargs)
+ if _socket in args[1]:
+ # for any socket write, simulate a timeout
+ return [], [], []
+ return result
+
+ def back(state):
+ if state == KazooState.CONNECTED:
+ ev.set()
+ client.add_listener(back)
+
+ try:
+ handler.select = delayed_select
+ self.assertRaises(ConnectionLoss, client.create, path)
+ finally:
+ handler.select = _select
+ # the client reconnects automatically
+ ev.wait(5)
+ eq_(ev.is_set(), True)
+ eq_(client.exists(path), None)
+
+ def test_connection_deserialize_fail(self):
+ client = self.client
+ ev = threading.Event()
+ path = "/" + uuid.uuid4().hex
+ handler = client.handler
+ _select = handler.select
+ _socket = client._connection._socket
+
+ def delayed_select(*args, **kwargs):
+ result = _select(*args, **kwargs)
+ if _socket in args[1]:
+ # for any socket write, simulate a timeout
+ return [], [], []
+ return result
+
+ def back(state):
+ if state == KazooState.CONNECTED:
+ ev.set()
+ client.add_listener(back)
+
+ deserialize_ev = threading.Event()
+
+ def bad_deserialize(bytes, offset):
+ deserialize_ev.set()
+ raise struct.error()
+
+ # force the connection to die but, on reconnect, cause the
+ # server response to be non-deserializable. ensure that the client
+ # continues to retry. This partially reproduces a rare bug seen
+ # in production.
+
+ with mock.patch.object(Connect, 'deserialize') as mock_deserialize:
+ mock_deserialize.side_effect = bad_deserialize
+ try:
+ handler.select = delayed_select
+ self.assertRaises(ConnectionLoss, client.create, path)
+ finally:
+ handler.select = _select
+ # the client reconnects automatically but the first attempt will
+ # hit a deserialize failure. wait for that.
+ deserialize_ev.wait(5)
+ eq_(deserialize_ev.is_set(), True)
+
+ # this time should succeed
+ ev.wait(5)
+ eq_(ev.is_set(), True)
+ eq_(client.exists(path), None)
+
+ def test_connection_close(self):
+ self.assertRaises(Exception, self.client.close)
+ self.client.stop()
+ self.client.close()
+
+ # should be able to restart
+ self.client.start()
+
+ def test_connection_pipe(self):
+ client = self.client
+ read_pipe = client._connection._read_pipe
+ write_pipe = client._connection._write_pipe
+
+ assert read_pipe is not None
+ assert write_pipe is not None
+
+ # stop client and pipe should not yet be closed
+ client.stop()
+ assert read_pipe is not None
+ assert write_pipe is not None
+ os.fstat(read_pipe)
+ os.fstat(write_pipe)
+
+ # close client, and pipes should be
+ client.close()
+
+ try:
+ os.fstat(read_pipe)
+ except OSError as e:
+ if not e.errno == errno.EBADF:
+ raise
+ else:
+ self.fail("Expected read_pipe to be closed")
+
+ try:
+ os.fstat(write_pipe)
+ except OSError as e:
+ if not e.errno == errno.EBADF:
+ raise
+ else:
+ self.fail("Expected write_pipe to be closed")
+
+ # start client back up. should get a new, valid pipe
+ client.start()
+ read_pipe = client._connection._read_pipe
+ write_pipe = client._connection._write_pipe
+
+ assert read_pipe is not None
+ assert write_pipe is not None
+ os.fstat(read_pipe)
+ os.fstat(write_pipe)
+
+ def test_dirty_pipe(self):
+ client = self.client
+ read_pipe = client._connection._read_pipe
+ write_pipe = client._connection._write_pipe
+
+ # add a stray byte to the pipe and ensure that doesn't
+ # blow up client. simulates case where some error leaves
+ # a byte in the pipe which doesn't correspond to the
+ # request queue.
+ os.write(write_pipe, b'\0')
+
+ # eventually this byte should disappear from pipe
+ wait(lambda: client.handler.select([read_pipe], [], [], 0)[0] == [])
+
+
+class TestConnectionDrop(KazooTestCase):
+ def test_connection_dropped(self):
+ ev = threading.Event()
+
+ def back(state):
+ if state == KazooState.CONNECTED:
+ ev.set()
+
+ # create a node with a large value and stop the ZK node
+ path = "/" + uuid.uuid4().hex
+ self.client.create(path)
+ self.client.add_listener(back)
+ result = self.client.set_async(path, b'a' * 1000 * 1024)
+ self.client._call(_CONNECTION_DROP, None)
+
+ self.assertRaises(ConnectionLoss, result.get)
+ # we have a working connection to a new node
+ ev.wait(30)
+ eq_(ev.is_set(), True)
+
+
+class TestReadOnlyMode(KazooTestCase):
+
+ def setUp(self):
+ self.setup_zookeeper(read_only=True)
+ skip = False
+ if TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION < (3, 4):
+ skip = True
+ elif TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION >= (3, 4):
+ skip = False
+ else:
+ ver = self.client.server_version()
+ if ver[1] < 4:
+ skip = True
+ if skip:
+ raise SkipTest("Must use Zookeeper 3.4 or above")
+
+ def tearDown(self):
+ self.client.stop()
+
+ def test_read_only(self):
+ from kazoo.exceptions import NotReadOnlyCallError
+ from kazoo.protocol.states import KeeperState
+
+ client = self.client
+ states = []
+ ev = threading.Event()
+
+ @client.add_listener
+ def listen(state):
+ states.append(state)
+ if client.client_state == KeeperState.CONNECTED_RO:
+ ev.set()
+ try:
+ self.cluster[1].stop()
+ self.cluster[2].stop()
+ ev.wait(6)
+ eq_(ev.is_set(), True)
+ eq_(client.client_state, KeeperState.CONNECTED_RO)
+
+ # Test read only command
+ eq_(client.get_children('/'), [])
+
+ # Test error with write command
+ @raises(NotReadOnlyCallError)
+ def testit():
+ client.create('/fred')
+ testit()
+
+ # Wait for a ping
+ time.sleep(15)
+ finally:
+ client.remove_listener(listen)
+ self.cluster[1].run()
+ self.cluster[2].run()
diff --git a/slider-agent/src/main/python/kazoo/tests/test_counter.py b/slider-agent/src/main/python/kazoo/tests/test_counter.py
new file mode 100644
index 0000000..b0361d0
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_counter.py
@@ -0,0 +1,36 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+
+
+class KazooCounterTests(KazooTestCase):
+
+ def _makeOne(self, **kw):
+ path = "/" + uuid.uuid4().hex
+ return self.client.Counter(path, **kw)
+
+ def test_int_counter(self):
+ counter = self._makeOne()
+ eq_(counter.value, 0)
+ counter += 2
+ counter + 1
+ eq_(counter.value, 3)
+ counter -= 3
+ counter - 1
+ eq_(counter.value, -1)
+
+ def test_float_counter(self):
+ counter = self._makeOne(default=0.0)
+ eq_(counter.value, 0.0)
+ counter += 2.1
+ eq_(counter.value, 2.1)
+ counter -= 3.1
+ eq_(counter.value, -1.0)
+
+ def test_errors(self):
+ counter = self._makeOne()
+ self.assertRaises(TypeError, counter.__add__, 2.1)
+ self.assertRaises(TypeError, counter.__add__, b"a")
diff --git a/slider-agent/src/main/python/kazoo/tests/test_election.py b/slider-agent/src/main/python/kazoo/tests/test_election.py
new file mode 100644
index 0000000..a9610bf
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_election.py
@@ -0,0 +1,140 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+import sys
+import threading
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import wait
+
+
+class UniqueError(Exception):
+ """Error raised only by test leader function
+ """
+
+
+class KazooElectionTests(KazooTestCase):
+ def setUp(self):
+ super(KazooElectionTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+
+ self.condition = threading.Condition()
+
+ # election contenders set these when elected. The exit event is set by
+ # the test to make the leader exit.
+ self.leader_id = None
+ self.exit_event = None
+
+ # tests set this before the event to make the leader raise an error
+ self.raise_exception = False
+
+ # set by a worker thread when an unexpected error is hit.
+ # better way to do this?
+ self.thread_exc_info = None
+
+ def _spawn_contender(self, contender_id, election):
+ thread = threading.Thread(target=self._election_thread,
+ args=(contender_id, election))
+ thread.daemon = True
+ thread.start()
+ return thread
+
+ def _election_thread(self, contender_id, election):
+ try:
+ election.run(self._leader_func, contender_id)
+ except UniqueError:
+ if not self.raise_exception:
+ self.thread_exc_info = sys.exc_info()
+ except Exception:
+ self.thread_exc_info = sys.exc_info()
+ else:
+ if self.raise_exception:
+ e = Exception("expected leader func to raise exception")
+ self.thread_exc_info = (Exception, e, None)
+
+ def _leader_func(self, name):
+ exit_event = threading.Event()
+ with self.condition:
+ self.exit_event = exit_event
+ self.leader_id = name
+ self.condition.notify_all()
+
+ exit_event.wait(45)
+ if self.raise_exception:
+ raise UniqueError("expected error in the leader function")
+
+ def _check_thread_error(self):
+ if self.thread_exc_info:
+ t, o, tb = self.thread_exc_info
+ raise t(o)
+
+ def test_election(self):
+ elections = {}
+ threads = {}
+ for _ in range(3):
+ contender = "c" + uuid.uuid4().hex
+ elections[contender] = self.client.Election(self.path, contender)
+ threads[contender] = self._spawn_contender(contender,
+ elections[contender])
+
+ # wait for a leader to be elected
+ times = 0
+ with self.condition:
+ while not self.leader_id:
+ self.condition.wait(5)
+ times += 1
+ if times > 5:
+ raise Exception("Still not a leader: lid: %s",
+ self.leader_id)
+
+ election = self.client.Election(self.path)
+
+ # make sure all contenders are in the pool
+ wait(lambda: len(election.contenders()) == len(elections))
+ contenders = election.contenders()
+
+ eq_(set(contenders), set(elections.keys()))
+
+ # first one in list should be leader
+ first_leader = contenders[0]
+ eq_(first_leader, self.leader_id)
+
+ # tell second one to cancel election. should never get elected.
+ elections[contenders[1]].cancel()
+
+ # make leader exit. third contender should be elected.
+ self.exit_event.set()
+ with self.condition:
+ while self.leader_id == first_leader:
+ self.condition.wait(45)
+ eq_(self.leader_id, contenders[2])
+ self._check_thread_error()
+
+ # make first contender re-enter the race
+ threads[first_leader].join()
+ threads[first_leader] = self._spawn_contender(first_leader,
+ elections[first_leader])
+
+ # contender set should now be the current leader plus the first leader
+ wait(lambda: len(election.contenders()) == 2)
+ contenders = election.contenders()
+ eq_(set(contenders), set([self.leader_id, first_leader]))
+
+ # make current leader raise an exception. first should be reelected
+ self.raise_exception = True
+ self.exit_event.set()
+ with self.condition:
+ while self.leader_id != first_leader:
+ self.condition.wait(45)
+ eq_(self.leader_id, first_leader)
+ self._check_thread_error()
+
+ self.exit_event.set()
+ for thread in threads.values():
+ thread.join()
+ self._check_thread_error()
+
+ def test_bad_func(self):
+ election = self.client.Election(self.path)
+ self.assertRaises(ValueError, election.run, "not a callable")
diff --git a/slider-agent/src/main/python/kazoo/tests/test_exceptions.py b/slider-agent/src/main/python/kazoo/tests/test_exceptions.py
new file mode 100644
index 0000000..e469089
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_exceptions.py
@@ -0,0 +1,23 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+from unittest import TestCase
+
+
+class ExceptionsTestCase(TestCase):
+
+ def _get(self):
+ from kazoo import exceptions
+ return exceptions
+
+ def test_backwards_alias(self):
+ module = self._get()
+ self.assertTrue(getattr(module, 'NoNodeException'))
+ self.assertTrue(module.NoNodeException, module.NoNodeError)
+
+ def test_exceptions_code(self):
+ module = self._get()
+ exc_8 = module.EXCEPTIONS[-8]
+ self.assertTrue(isinstance(exc_8(), module.BadArgumentsError))
+
+ def test_invalid_code(self):
+ module = self._get()
+ self.assertRaises(RuntimeError, module.EXCEPTIONS.__getitem__, 666)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_gevent_handler.py b/slider-agent/src/main/python/kazoo/tests/test_gevent_handler.py
new file mode 100644
index 0000000..71d9727
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_gevent_handler.py
@@ -0,0 +1,161 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import unittest
+
+from nose import SkipTest
+from nose.tools import eq_
+from nose.tools import raises
+
+from kazoo.client import KazooClient
+from kazoo.exceptions import NoNodeError
+from kazoo.protocol.states import Callback
+from kazoo.testing import KazooTestCase
+from kazoo.tests import test_client
+
+
+class TestGeventHandler(unittest.TestCase):
+
+ def setUp(self):
+ try:
+ import gevent
+ except ImportError:
+ raise SkipTest('gevent not available.')
+
+ def _makeOne(self, *args):
+ from kazoo.handlers.gevent import SequentialGeventHandler
+ return SequentialGeventHandler(*args)
+
+ def _getAsync(self, *args):
+ from kazoo.handlers.gevent import AsyncResult
+ return AsyncResult
+
+ def _getEvent(self):
+ from gevent.event import Event
+ return Event
+
+ def test_proper_threading(self):
+ h = self._makeOne()
+ h.start()
+ assert isinstance(h.event_object(), self._getEvent())
+
+ def test_matching_async(self):
+ h = self._makeOne()
+ h.start()
+ async = self._getAsync()
+ assert isinstance(h.async_result(), async)
+
+ def test_exception_raising(self):
+ h = self._makeOne()
+
+ @raises(h.timeout_exception)
+ def testit():
+ raise h.timeout_exception("This is a timeout")
+ testit()
+
+ def test_exception_in_queue(self):
+ h = self._makeOne()
+ h.start()
+ ev = self._getEvent()()
+
+ def func():
+ ev.set()
+ raise ValueError('bang')
+
+ call1 = Callback('completion', func, ())
+ h.dispatch_callback(call1)
+ ev.wait()
+
+ def test_queue_empty_exception(self):
+ from gevent.queue import Empty
+ h = self._makeOne()
+ h.start()
+ ev = self._getEvent()()
+
+ def func():
+ ev.set()
+ raise Empty()
+
+ call1 = Callback('completion', func, ())
+ h.dispatch_callback(call1)
+ ev.wait()
+
+
+class TestBasicGeventClient(KazooTestCase):
+
+ def setUp(self):
+ try:
+ import gevent
+ except ImportError:
+ raise SkipTest('gevent not available.')
+ KazooTestCase.setUp(self)
+
+ def _makeOne(self, *args):
+ from kazoo.handlers.gevent import SequentialGeventHandler
+ return SequentialGeventHandler(*args)
+
+ def _getEvent(self):
+ from gevent.event import Event
+ return Event
+
+ def test_start(self):
+ client = self._get_client(handler=self._makeOne())
+ client.start()
+ self.assertEqual(client.state, 'CONNECTED')
+ client.stop()
+
+ def test_start_stop_double(self):
+ client = self._get_client(handler=self._makeOne())
+ client.start()
+ self.assertEqual(client.state, 'CONNECTED')
+ client.handler.start()
+ client.handler.stop()
+ client.stop()
+
+ def test_basic_commands(self):
+ client = self._get_client(handler=self._makeOne())
+ client.start()
+ self.assertEqual(client.state, 'CONNECTED')
+ client.create('/anode', 'fred')
+ eq_(client.get('/anode')[0], 'fred')
+ eq_(client.delete('/anode'), True)
+ eq_(client.exists('/anode'), None)
+ client.stop()
+
+ def test_failures(self):
+ client = self._get_client(handler=self._makeOne())
+ client.start()
+ self.assertRaises(NoNodeError, client.get, '/none')
+ client.stop()
+
+ def test_data_watcher(self):
+ client = self._get_client(handler=self._makeOne())
+ client.start()
+ client.ensure_path('/some/node')
+ ev = self._getEvent()()
+
+ @client.DataWatch('/some/node')
+ def changed(d, stat):
+ ev.set()
+
+ ev.wait()
+ ev.clear()
+ client.set('/some/node', 'newvalue')
+ ev.wait()
+ client.stop()
+
+
+class TestGeventClient(test_client.TestClient):
+
+ def setUp(self):
+ try:
+ import gevent
+ except ImportError:
+ raise SkipTest('gevent not available.')
+ KazooTestCase.setUp(self)
+
+ def _makeOne(self, *args):
+ from kazoo.handlers.gevent import SequentialGeventHandler
+ return SequentialGeventHandler(*args)
+
+ def _get_client(self, **kwargs):
+ kwargs["handler"] = self._makeOne()
+ return KazooClient(self.hosts, **kwargs)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_lock.py b/slider-agent/src/main/python/kazoo/tests/test_lock.py
new file mode 100644
index 0000000..6dd15b0
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_lock.py
@@ -0,0 +1,518 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+import threading
+
+from nose.tools import eq_, ok_
+
+from kazoo.exceptions import CancelledError
+from kazoo.exceptions import LockTimeout
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import wait
+
+
+class KazooLockTests(KazooTestCase):
+ def setUp(self):
+ super(KazooLockTests, self).setUp()
+ self.lockpath = "/" + uuid.uuid4().hex
+
+ self.condition = threading.Condition()
+ self.released = threading.Event()
+ self.active_thread = None
+ self.cancelled_threads = []
+
+ def _thread_lock_acquire_til_event(self, name, lock, event):
+ try:
+ with lock:
+ with self.condition:
+ eq_(self.active_thread, None)
+ self.active_thread = name
+ self.condition.notify_all()
+
+ event.wait()
+
+ with self.condition:
+ eq_(self.active_thread, name)
+ self.active_thread = None
+ self.condition.notify_all()
+ self.released.set()
+ except CancelledError:
+ with self.condition:
+ self.cancelled_threads.append(name)
+ self.condition.notify_all()
+
+ def test_lock_one(self):
+ lock_name = uuid.uuid4().hex
+ lock = self.client.Lock(self.lockpath, lock_name)
+ event = threading.Event()
+
+ thread = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=(lock_name, lock, event))
+ thread.start()
+
+ lock2_name = uuid.uuid4().hex
+ anotherlock = self.client.Lock(self.lockpath, lock2_name)
+
+ # wait for any contender to show up on the lock
+ wait(anotherlock.contenders)
+ eq_(anotherlock.contenders(), [lock_name])
+
+ with self.condition:
+ while self.active_thread != lock_name:
+ self.condition.wait()
+
+ # release the lock
+ event.set()
+
+ with self.condition:
+ while self.active_thread:
+ self.condition.wait()
+ self.released.wait()
+ thread.join()
+
+ def test_lock(self):
+ threads = []
+ names = ["contender" + str(i) for i in range(5)]
+
+ contender_bits = {}
+
+ for name in names:
+ e = threading.Event()
+
+ l = self.client.Lock(self.lockpath, name)
+ t = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=(name, l, e))
+ contender_bits[name] = (t, e)
+ threads.append(t)
+
+ # acquire the lock ourselves first to make the others line up
+ lock = self.client.Lock(self.lockpath, "test")
+ lock.acquire()
+
+ for t in threads:
+ t.start()
+
+ # wait for everyone to line up on the lock
+ wait(lambda: len(lock.contenders()) == 6)
+ contenders = lock.contenders()
+
+ eq_(contenders[0], "test")
+ contenders = contenders[1:]
+ remaining = list(contenders)
+
+ # release the lock and contenders should claim it in order
+ lock.release()
+
+ for contender in contenders:
+ thread, event = contender_bits[contender]
+
+ with self.condition:
+ while not self.active_thread:
+ self.condition.wait()
+ eq_(self.active_thread, contender)
+
+ eq_(lock.contenders(), remaining)
+ remaining = remaining[1:]
+
+ event.set()
+
+ with self.condition:
+ while self.active_thread:
+ self.condition.wait()
+ for thread in threads:
+ thread.join()
+
+ def test_lock_reconnect(self):
+ event = threading.Event()
+ other_lock = self.client.Lock(self.lockpath, 'contender')
+ thread = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=('contender', other_lock, event))
+
+ # acquire the lock ourselves first to make the contender line up
+ lock = self.client.Lock(self.lockpath, "test")
+ lock.acquire()
+
+ thread.start()
+ # wait for the contender to line up on the lock
+ wait(lambda: len(lock.contenders()) == 2)
+ eq_(lock.contenders(), ['test', 'contender'])
+
+ self.expire_session()
+
+ lock.release()
+
+ with self.condition:
+ while not self.active_thread:
+ self.condition.wait()
+ eq_(self.active_thread, 'contender')
+
+ event.set()
+ thread.join()
+
+ def test_lock_non_blocking(self):
+ lock_name = uuid.uuid4().hex
+ lock = self.client.Lock(self.lockpath, lock_name)
+ event = threading.Event()
+
+ thread = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=(lock_name, lock, event))
+ thread.start()
+
+ lock1 = self.client.Lock(self.lockpath, lock_name)
+
+ # wait for the thread to acquire the lock
+ with self.condition:
+ if not self.active_thread:
+ self.condition.wait(5)
+
+ ok_(not lock1.acquire(blocking=False))
+ eq_(lock.contenders(), [lock_name]) # just one - itself
+
+ event.set()
+ thread.join()
+
+ def test_lock_fail_first_call(self):
+ event1 = threading.Event()
+ lock1 = self.client.Lock(self.lockpath, "one")
+ thread1 = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=("one", lock1, event1))
+ thread1.start()
+
+ # wait for this thread to acquire the lock
+ with self.condition:
+ if not self.active_thread:
+ self.condition.wait(5)
+ eq_(self.active_thread, "one")
+ eq_(lock1.contenders(), ["one"])
+ event1.set()
+ thread1.join()
+
+ def test_lock_cancel(self):
+ event1 = threading.Event()
+ lock1 = self.client.Lock(self.lockpath, "one")
+ thread1 = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=("one", lock1, event1))
+ thread1.start()
+
+ # wait for this thread to acquire the lock
+ with self.condition:
+ if not self.active_thread:
+ self.condition.wait(5)
+ eq_(self.active_thread, "one")
+
+ client2 = self._get_client()
+ client2.start()
+ event2 = threading.Event()
+ lock2 = client2.Lock(self.lockpath, "two")
+ thread2 = threading.Thread(target=self._thread_lock_acquire_til_event,
+ args=("two", lock2, event2))
+ thread2.start()
+
+ # this one should block in acquire. check that it is a contender
+ wait(lambda: len(lock2.contenders()) > 1)
+ eq_(lock2.contenders(), ["one", "two"])
+
+ lock2.cancel()
+ with self.condition:
+ if not "two" in self.cancelled_threads:
+ self.condition.wait()
+ assert "two" in self.cancelled_threads
+
+ eq_(lock2.contenders(), ["one"])
+
+ thread2.join()
+ event1.set()
+ thread1.join()
+ client2.stop()
+
+ def test_lock_double_calls(self):
+ lock1 = self.client.Lock(self.lockpath, "one")
+ lock1.acquire()
+ lock1.acquire()
+ lock1.release()
+ lock1.release()
+
+ def test_lock_reacquire(self):
+ lock = self.client.Lock(self.lockpath, "one")
+ lock.acquire()
+ lock.release()
+ lock.acquire()
+ lock.release()
+
+ def test_lock_timeout(self):
+ timeout = 3
+ e = threading.Event()
+ started = threading.Event()
+
+ # In the background thread, acquire the lock and wait thrice the time
+ # that the main thread is going to wait to acquire the lock.
+ lock1 = self.client.Lock(self.lockpath, "one")
+
+ def _thread(lock, event, timeout):
+ with lock:
+ started.set()
+ event.wait(timeout)
+ if not event.isSet():
+ # Eventually fail to avoid hanging the tests
+ self.fail("lock2 never timed out")
+
+ t = threading.Thread(target=_thread, args=(lock1, e, timeout * 3))
+ t.start()
+
+ # Start the main thread's kazoo client and try to acquire the lock
+ # but give up after `timeout` seconds
+ client2 = self._get_client()
+ client2.start()
+ started.wait(5)
+ self.assertTrue(started.isSet())
+ lock2 = client2.Lock(self.lockpath, "two")
+ try:
+ lock2.acquire(timeout=timeout)
+ except LockTimeout:
+ # A timeout is the behavior we're expecting, since the background
+ # thread should still be holding onto the lock
+ pass
+ else:
+ self.fail("Main thread unexpectedly acquired the lock")
+ finally:
+ # Cleanup
+ e.set()
+ t.join()
+ client2.stop()
+
+
+class TestSemaphore(KazooTestCase):
+ def setUp(self):
+ super(TestSemaphore, self).setUp()
+ self.lockpath = "/" + uuid.uuid4().hex
+
+ self.condition = threading.Condition()
+ self.released = threading.Event()
+ self.active_thread = None
+ self.cancelled_threads = []
+
+ def test_basic(self):
+ sem1 = self.client.Semaphore(self.lockpath)
+ sem1.acquire()
+ sem1.release()
+
+ def test_lock_one(self):
+ sem1 = self.client.Semaphore(self.lockpath, max_leases=1)
+ sem2 = self.client.Semaphore(self.lockpath, max_leases=1)
+ started = threading.Event()
+ event = threading.Event()
+
+ sem1.acquire()
+
+ def sema_one():
+ started.set()
+ with sem2:
+ event.set()
+
+ thread = threading.Thread(target=sema_one, args=())
+ thread.start()
+ started.wait(10)
+
+ self.assertFalse(event.is_set())
+
+ sem1.release()
+ event.wait(10)
+ self.assert_(event.is_set())
+ thread.join()
+
+ def test_non_blocking(self):
+ sem1 = self.client.Semaphore(
+ self.lockpath, identifier='sem1', max_leases=2)
+ sem2 = self.client.Semaphore(
+ self.lockpath, identifier='sem2', max_leases=2)
+ sem3 = self.client.Semaphore(
+ self.lockpath, identifier='sem3', max_leases=2)
+
+ sem1.acquire()
+ sem2.acquire()
+ ok_(not sem3.acquire(blocking=False))
+ eq_(set(sem1.lease_holders()), set(['sem1', 'sem2']))
+ sem2.release()
+ # the next line isn't required, but avoids timing issues in tests
+ sem3.acquire()
+ eq_(set(sem1.lease_holders()), set(['sem1', 'sem3']))
+ sem1.release()
+ sem3.release()
+
+ def test_non_blocking_release(self):
+ sem1 = self.client.Semaphore(
+ self.lockpath, identifier='sem1', max_leases=1)
+ sem2 = self.client.Semaphore(
+ self.lockpath, identifier='sem2', max_leases=1)
+ sem1.acquire()
+ sem2.acquire(blocking=False)
+
+ # make sure there's no shutdown / cleanup error
+ sem1.release()
+ sem2.release()
+
+ def test_holders(self):
+ started = threading.Event()
+ event = threading.Event()
+
+ def sema_one():
+ with self.client.Semaphore(self.lockpath, 'fred', max_leases=1):
+ started.set()
+ event.wait()
+
+ thread = threading.Thread(target=sema_one, args=())
+ thread.start()
+ started.wait()
+ sem1 = self.client.Semaphore(self.lockpath)
+ holders = sem1.lease_holders()
+ eq_(holders, ['fred'])
+ event.set()
+ thread.join()
+
+ def test_semaphore_cancel(self):
+ sem1 = self.client.Semaphore(self.lockpath, 'fred', max_leases=1)
+ sem2 = self.client.Semaphore(self.lockpath, 'george', max_leases=1)
+ sem1.acquire()
+ started = threading.Event()
+ event = threading.Event()
+
+ def sema_one():
+ started.set()
+ try:
+ with sem2:
+ started.set()
+ except CancelledError:
+ event.set()
+
+ thread = threading.Thread(target=sema_one, args=())
+ thread.start()
+ started.wait()
+ eq_(sem1.lease_holders(), ['fred'])
+ eq_(event.is_set(), False)
+ sem2.cancel()
+ event.wait()
+ eq_(event.is_set(), True)
+ thread.join()
+
+ def test_multiple_acquire_and_release(self):
+ sem1 = self.client.Semaphore(self.lockpath, 'fred', max_leases=1)
+ sem1.acquire()
+ sem1.acquire()
+
+ eq_(True, sem1.release())
+ eq_(False, sem1.release())
+
+ def test_handle_session_loss(self):
+ expire_semaphore = self.client.Semaphore(self.lockpath, 'fred',
+ max_leases=1)
+
+ client = self._get_client()
+ client.start()
+ lh_semaphore = client.Semaphore(self.lockpath, 'george', max_leases=1)
+ lh_semaphore.acquire()
+
+ started = threading.Event()
+ event = threading.Event()
+ event2 = threading.Event()
+
+ def sema_one():
+ started.set()
+ with expire_semaphore:
+ event.set()
+ event2.wait()
+
+ thread = threading.Thread(target=sema_one, args=())
+ thread.start()
+
+ started.wait()
+ eq_(lh_semaphore.lease_holders(), ['george'])
+
+ # Fired in a separate thread to make sure we can see the effect
+ expired = threading.Event()
+
+ def expire():
+ self.expire_session()
+ expired.set()
+
+ thread = threading.Thread(target=expire, args=())
+ thread.start()
+ expire_semaphore.wake_event.wait()
+ expired.wait()
+
+ lh_semaphore.release()
+ client.stop()
+
+ event.wait(5)
+ eq_(expire_semaphore.lease_holders(), ['fred'])
+ event2.set()
+ thread.join()
+
+ def test_inconsistent_max_leases(self):
+ sem1 = self.client.Semaphore(self.lockpath, max_leases=1)
+ sem2 = self.client.Semaphore(self.lockpath, max_leases=2)
+
+ sem1.acquire()
+ self.assertRaises(ValueError, sem2.acquire)
+
+ def test_inconsistent_max_leases_other_data(self):
+ sem1 = self.client.Semaphore(self.lockpath, max_leases=1)
+ sem2 = self.client.Semaphore(self.lockpath, max_leases=2)
+
+ self.client.ensure_path(self.lockpath)
+ self.client.set(self.lockpath, b'a$')
+
+ sem1.acquire()
+ # sem2 thinks it's ok to have two lease holders
+ ok_(sem2.acquire(blocking=False))
+
+ def test_reacquire(self):
+ lock = self.client.Semaphore(self.lockpath)
+ lock.acquire()
+ lock.release()
+ lock.acquire()
+ lock.release()
+
+ def test_acquire_after_cancelled(self):
+ lock = self.client.Semaphore(self.lockpath)
+ self.assertTrue(lock.acquire())
+ self.assertTrue(lock.release())
+ lock.cancel()
+ self.assertTrue(lock.cancelled)
+ self.assertTrue(lock.acquire())
+
+ def test_timeout(self):
+ timeout = 3
+ e = threading.Event()
+ started = threading.Event()
+
+ # In the background thread, acquire the lock and wait thrice the time
+ # that the main thread is going to wait to acquire the lock.
+ sem1 = self.client.Semaphore(self.lockpath, "one")
+
+ def _thread(sem, event, timeout):
+ with sem:
+ started.set()
+ event.wait(timeout)
+ if not event.isSet():
+ # Eventually fail to avoid hanging the tests
+ self.fail("sem2 never timed out")
+
+ t = threading.Thread(target=_thread, args=(sem1, e, timeout * 3))
+ t.start()
+
+ # Start the main thread's kazoo client and try to acquire the lock
+ # but give up after `timeout` seconds
+ client2 = self._get_client()
+ client2.start()
+ started.wait(5)
+ self.assertTrue(started.isSet())
+ sem2 = client2.Semaphore(self.lockpath, "two")
+ try:
+ sem2.acquire(timeout=timeout)
+ except LockTimeout:
+ # A timeout is the behavior we're expecting, since the background
+ # thread will still be holding onto the lock
+ e.set()
+ finally:
+ # Cleanup
+ t.join()
+ client2.stop()
diff --git a/slider-agent/src/main/python/kazoo/tests/test_partitioner.py b/slider-agent/src/main/python/kazoo/tests/test_partitioner.py
new file mode 100644
index 0000000..1a4f205
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_partitioner.py
@@ -0,0 +1,93 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+import time
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+from kazoo.recipe.partitioner import PartitionState
+
+
+class KazooPartitionerTests(KazooTestCase):
+ def setUp(self):
+ super(KazooPartitionerTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+
+ def test_party_of_one(self):
+ partitioner = self.client.SetPartitioner(
+ self.path, set=(1, 2, 3), time_boundary=0.2)
+ partitioner.wait_for_acquire(14)
+ eq_(partitioner.state, PartitionState.ACQUIRED)
+ eq_(list(partitioner), [1, 2, 3])
+ partitioner.finish()
+
+ def test_party_of_two(self):
+ partitioners = [self.client.SetPartitioner(self.path, (1, 2),
+ identifier="p%s" % i, time_boundary=0.2)
+ for i in range(2)]
+
+ partitioners[0].wait_for_acquire(14)
+ partitioners[1].wait_for_acquire(14)
+ eq_(list(partitioners[0]), [1])
+ eq_(list(partitioners[1]), [2])
+ partitioners[0].finish()
+ time.sleep(0.1)
+ eq_(partitioners[1].release, True)
+ partitioners[1].finish()
+
+ def test_party_expansion(self):
+ partitioners = [self.client.SetPartitioner(self.path, (1, 2, 3),
+ identifier="p%s" % i, time_boundary=0.2)
+ for i in range(2)]
+
+ partitioners[0].wait_for_acquire(14)
+ partitioners[1].wait_for_acquire(14)
+ eq_(partitioners[0].state, PartitionState.ACQUIRED)
+ eq_(partitioners[1].state, PartitionState.ACQUIRED)
+
+ eq_(list(partitioners[0]), [1, 3])
+ eq_(list(partitioners[1]), [2])
+
+ # Add another partition, wait till they settle
+ partitioners.append(self.client.SetPartitioner(self.path, (1, 2, 3),
+ identifier="p2", time_boundary=0.2))
+ time.sleep(0.1)
+ eq_(partitioners[0].release, True)
+ for p in partitioners[:-1]:
+ p.release_set()
+
+ for p in partitioners:
+ p.wait_for_acquire(14)
+
+ eq_(list(partitioners[0]), [1])
+ eq_(list(partitioners[1]), [2])
+ eq_(list(partitioners[2]), [3])
+
+ for p in partitioners:
+ p.finish()
+
+ def test_more_members_than_set_items(self):
+ partitioners = [self.client.SetPartitioner(self.path, (1,),
+ identifier="p%s" % i, time_boundary=0.2)
+ for i in range(2)]
+
+ partitioners[0].wait_for_acquire(14)
+ partitioners[1].wait_for_acquire(14)
+ eq_(partitioners[0].state, PartitionState.ACQUIRED)
+ eq_(partitioners[1].state, PartitionState.ACQUIRED)
+
+ eq_(list(partitioners[0]), [1])
+ eq_(list(partitioners[1]), [])
+
+ for p in partitioners:
+ p.finish()
+
+ def test_party_session_failure(self):
+ partitioner = self.client.SetPartitioner(
+ self.path, set=(1, 2, 3), time_boundary=0.2)
+ partitioner.wait_for_acquire(14)
+ eq_(partitioner.state, PartitionState.ACQUIRED)
+ # simulate session failure
+ partitioner._fail_out()
+ partitioner.release_set()
+ self.assertTrue(partitioner.failed)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_party.py b/slider-agent/src/main/python/kazoo/tests/test_party.py
new file mode 100644
index 0000000..61400ae
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_party.py
@@ -0,0 +1,85 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+
+
+class KazooPartyTests(KazooTestCase):
+ def setUp(self):
+ super(KazooPartyTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+
+ def test_party(self):
+ parties = [self.client.Party(self.path, "p%s" % i)
+ for i in range(5)]
+
+ one_party = parties[0]
+
+ eq_(list(one_party), [])
+ eq_(len(one_party), 0)
+
+ participants = set()
+ for party in parties:
+ party.join()
+ participants.add(party.data.decode('utf-8'))
+
+ eq_(set(party), participants)
+ eq_(len(party), len(participants))
+
+ for party in parties:
+ party.leave()
+ participants.remove(party.data.decode('utf-8'))
+
+ eq_(set(party), participants)
+ eq_(len(party), len(participants))
+
+ def test_party_reuse_node(self):
+ party = self.client.Party(self.path, "p1")
+ self.client.ensure_path(self.path)
+ self.client.create(party.create_path)
+ party.join()
+ self.assertTrue(party.participating)
+ party.leave()
+ self.assertFalse(party.participating)
+ self.assertEqual(len(party), 0)
+
+ def test_party_vanishing_node(self):
+ party = self.client.Party(self.path, "p1")
+ party.join()
+ self.assertTrue(party.participating)
+ self.client.delete(party.create_path)
+ party.leave()
+ self.assertFalse(party.participating)
+ self.assertEqual(len(party), 0)
+
+
+class KazooShallowPartyTests(KazooTestCase):
+ def setUp(self):
+ super(KazooShallowPartyTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+
+ def test_party(self):
+ parties = [self.client.ShallowParty(self.path, "p%s" % i)
+ for i in range(5)]
+
+ one_party = parties[0]
+
+ eq_(list(one_party), [])
+ eq_(len(one_party), 0)
+
+ participants = set()
+ for party in parties:
+ party.join()
+ participants.add(party.data.decode('utf-8'))
+
+ eq_(set(party), participants)
+ eq_(len(party), len(participants))
+
+ for party in parties:
+ party.leave()
+ participants.remove(party.data.decode('utf-8'))
+
+ eq_(set(party), participants)
+ eq_(len(party), len(participants))
diff --git a/slider-agent/src/main/python/kazoo/tests/test_paths.py b/slider-agent/src/main/python/kazoo/tests/test_paths.py
new file mode 100644
index 0000000..c9064bb
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_paths.py
@@ -0,0 +1,99 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import sys
+from unittest import TestCase
+
+from kazoo.protocol import paths
+
+
+if sys.version_info > (3, ): # pragma: nocover
+ def u(s):
+ return s
+else: # pragma: nocover
+ def u(s):
+ return unicode(s, "unicode_escape")
+
+
+class NormPathTestCase(TestCase):
+
+ def test_normpath(self):
+ self.assertEqual(paths.normpath('/a/b'), '/a/b')
+
+ def test_normpath_empty(self):
+ self.assertEqual(paths.normpath(''), '')
+
+ def test_normpath_unicode(self):
+ self.assertEqual(paths.normpath(u('/\xe4/b')), u('/\xe4/b'))
+
+ def test_normpath_dots(self):
+ self.assertEqual(paths.normpath('/a./b../c'), '/a./b../c')
+
+ def test_normpath_slash(self):
+ self.assertEqual(paths.normpath('/'), '/')
+
+ def test_normpath_multiple_slashes(self):
+ self.assertEqual(paths.normpath('//'), '/')
+ self.assertEqual(paths.normpath('//a/b'), '/a/b')
+ self.assertEqual(paths.normpath('/a//b//'), '/a/b')
+ self.assertEqual(paths.normpath('//a////b///c/'), '/a/b/c')
+
+ def test_normpath_relative(self):
+ self.assertRaises(ValueError, paths.normpath, './a/b')
+ self.assertRaises(ValueError, paths.normpath, '/a/../b')
+
+
+class JoinTestCase(TestCase):
+
+ def test_join(self):
+ self.assertEqual(paths.join('/a'), '/a')
+ self.assertEqual(paths.join('/a', 'b/'), '/a/b/')
+ self.assertEqual(paths.join('/a', 'b', 'c'), '/a/b/c')
+
+ def test_join_empty(self):
+ self.assertEqual(paths.join(''), '')
+ self.assertEqual(paths.join('', 'a', 'b'), 'a/b')
+ self.assertEqual(paths.join('/a', '', 'b/', 'c'), '/a/b/c')
+
+ def test_join_absolute(self):
+ self.assertEqual(paths.join('/a/b', '/c'), '/c')
+
+
+class IsAbsTestCase(TestCase):
+
+ def test_isabs(self):
+ self.assertTrue(paths.isabs('/'))
+ self.assertTrue(paths.isabs('/a'))
+ self.assertTrue(paths.isabs('/a//b/c'))
+ self.assertTrue(paths.isabs('//a/b'))
+
+ def test_isabs_false(self):
+ self.assertFalse(paths.isabs(''))
+ self.assertFalse(paths.isabs('a/'))
+ self.assertFalse(paths.isabs('a/../'))
+
+
+class BaseNameTestCase(TestCase):
+
+ def test_basename(self):
+ self.assertEquals(paths.basename(''), '')
+ self.assertEquals(paths.basename('/'), '')
+ self.assertEquals(paths.basename('//a'), 'a')
+ self.assertEquals(paths.basename('//a/'), '')
+ self.assertEquals(paths.basename('/a/b.//c..'), 'c..')
+
+
+class PrefixRootTestCase(TestCase):
+
+ def test_prefix_root(self):
+ self.assertEquals(paths._prefix_root('/a/', 'b/c'), '/a/b/c')
+ self.assertEquals(paths._prefix_root('/a/b', 'c/d'), '/a/b/c/d')
+ self.assertEquals(paths._prefix_root('/a', '/b/c'), '/a/b/c')
+ self.assertEquals(paths._prefix_root('/a', '//b/c.'), '/a/b/c.')
+
+
+class NormRootTestCase(TestCase):
+
+ def test_norm_root(self):
+ self.assertEquals(paths._norm_root(''), '/')
+ self.assertEquals(paths._norm_root('/'), '/')
+ self.assertEquals(paths._norm_root('//a'), '/a')
+ self.assertEquals(paths._norm_root('//a./b'), '/a./b')
diff --git a/slider-agent/src/main/python/kazoo/tests/test_queue.py b/slider-agent/src/main/python/kazoo/tests/test_queue.py
new file mode 100644
index 0000000..4c13ca9
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_queue.py
@@ -0,0 +1,180 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+
+from nose import SkipTest
+from nose.tools import eq_, ok_
+
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import TRAVIS_ZK_VERSION
+
+
+class KazooQueueTests(KazooTestCase):
+
+ def _makeOne(self):
+ path = "/" + uuid.uuid4().hex
+ return self.client.Queue(path)
+
+ def test_queue_validation(self):
+ queue = self._makeOne()
+ self.assertRaises(TypeError, queue.put, {})
+ self.assertRaises(TypeError, queue.put, b"one", b"100")
+ self.assertRaises(TypeError, queue.put, b"one", 10.0)
+ self.assertRaises(ValueError, queue.put, b"one", -100)
+ self.assertRaises(ValueError, queue.put, b"one", 100000)
+
+ def test_empty_queue(self):
+ queue = self._makeOne()
+ eq_(len(queue), 0)
+ self.assertTrue(queue.get() is None)
+ eq_(len(queue), 0)
+
+ def test_queue(self):
+ queue = self._makeOne()
+ queue.put(b"one")
+ queue.put(b"two")
+ queue.put(b"three")
+ eq_(len(queue), 3)
+
+ eq_(queue.get(), b"one")
+ eq_(queue.get(), b"two")
+ eq_(queue.get(), b"three")
+ eq_(len(queue), 0)
+
+ def test_priority(self):
+ queue = self._makeOne()
+ queue.put(b"four", priority=101)
+ queue.put(b"one", priority=0)
+ queue.put(b"two", priority=0)
+ queue.put(b"three", priority=10)
+
+ eq_(queue.get(), b"one")
+ eq_(queue.get(), b"two")
+ eq_(queue.get(), b"three")
+ eq_(queue.get(), b"four")
+
+
+class KazooLockingQueueTests(KazooTestCase):
+
+ def setUp(self):
+ KazooTestCase.setUp(self)
+ skip = False
+ if TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION < (3, 4):
+ skip = True
+ elif TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION >= (3, 4):
+ skip = False
+ else:
+ ver = self.client.server_version()
+ if ver[1] < 4:
+ skip = True
+ if skip:
+ raise SkipTest("Must use Zookeeper 3.4 or above")
+
+ def _makeOne(self):
+ path = "/" + uuid.uuid4().hex
+ return self.client.LockingQueue(path)
+
+ def test_queue_validation(self):
+ queue = self._makeOne()
+ self.assertRaises(TypeError, queue.put, {})
+ self.assertRaises(TypeError, queue.put, b"one", b"100")
+ self.assertRaises(TypeError, queue.put, b"one", 10.0)
+ self.assertRaises(ValueError, queue.put, b"one", -100)
+ self.assertRaises(ValueError, queue.put, b"one", 100000)
+ self.assertRaises(TypeError, queue.put_all, {})
+ self.assertRaises(TypeError, queue.put_all, [{}])
+ self.assertRaises(TypeError, queue.put_all, [b"one"], b"100")
+ self.assertRaises(TypeError, queue.put_all, [b"one"], 10.0)
+ self.assertRaises(ValueError, queue.put_all, [b"one"], -100)
+ self.assertRaises(ValueError, queue.put_all, [b"one"], 100000)
+
+ def test_empty_queue(self):
+ queue = self._makeOne()
+ eq_(len(queue), 0)
+ self.assertTrue(queue.get(0) is None)
+ eq_(len(queue), 0)
+
+ def test_queue(self):
+ queue = self._makeOne()
+ queue.put(b"one")
+ queue.put_all([b"two", b"three"])
+ eq_(len(queue), 3)
+
+ ok_(not queue.consume())
+ ok_(not queue.holds_lock())
+ eq_(queue.get(1), b"one")
+ ok_(queue.holds_lock())
+ # Without consuming, should return the same element
+ eq_(queue.get(1), b"one")
+ ok_(queue.consume())
+ ok_(not queue.holds_lock())
+ eq_(queue.get(1), b"two")
+ ok_(queue.holds_lock())
+ ok_(queue.consume())
+ ok_(not queue.holds_lock())
+ eq_(queue.get(1), b"three")
+ ok_(queue.holds_lock())
+ ok_(queue.consume())
+ ok_(not queue.holds_lock())
+ ok_(not queue.consume())
+ eq_(len(queue), 0)
+
+ def test_consume(self):
+ queue = self._makeOne()
+
+ queue.put(b"one")
+ ok_(not queue.consume())
+ queue.get(.1)
+ ok_(queue.consume())
+ ok_(not queue.consume())
+
+ def test_holds_lock(self):
+ queue = self._makeOne()
+
+ ok_(not queue.holds_lock())
+ queue.put(b"one")
+ queue.get(.1)
+ ok_(queue.holds_lock())
+ queue.consume()
+ ok_(not queue.holds_lock())
+
+ def test_priority(self):
+ queue = self._makeOne()
+ queue.put(b"four", priority=101)
+ queue.put(b"one", priority=0)
+ queue.put(b"two", priority=0)
+ queue.put(b"three", priority=10)
+
+ eq_(queue.get(1), b"one")
+ ok_(queue.consume())
+ eq_(queue.get(1), b"two")
+ ok_(queue.consume())
+ eq_(queue.get(1), b"three")
+ ok_(queue.consume())
+ eq_(queue.get(1), b"four")
+ ok_(queue.consume())
+
+ def test_concurrent_execution(self):
+ queue = self._makeOne()
+ value1 = []
+ value2 = []
+ value3 = []
+ event1 = self.client.handler.event_object()
+ event2 = self.client.handler.event_object()
+ event3 = self.client.handler.event_object()
+
+ def get_concurrently(value, event):
+ q = self.client.LockingQueue(queue.path)
+ value.append(q.get(.1))
+ event.set()
+
+ self.client.handler.spawn(get_concurrently, value1, event1)
+ self.client.handler.spawn(get_concurrently, value2, event2)
+ self.client.handler.spawn(get_concurrently, value3, event3)
+ queue.put(b"one")
+ event1.wait(.2)
+ event2.wait(.2)
+ event3.wait(.2)
+
+ result = value1 + value2 + value3
+ eq_(result.count(b"one"), 1)
+ eq_(result.count(None), 2)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_retry.py b/slider-agent/src/main/python/kazoo/tests/test_retry.py
new file mode 100644
index 0000000..84c8d41
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_retry.py
@@ -0,0 +1,78 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import unittest
+
+from nose.tools import eq_
+
+
+class TestRetrySleeper(unittest.TestCase):
+
+ def _pass(self):
+ pass
+
+ def _fail(self, times=1):
+ from kazoo.retry import ForceRetryError
+ scope = dict(times=0)
+
+ def inner():
+ if scope['times'] >= times:
+ pass
+ else:
+ scope['times'] += 1
+ raise ForceRetryError('Failed!')
+ return inner
+
+ def _makeOne(self, *args, **kwargs):
+ from kazoo.retry import KazooRetry
+ return KazooRetry(*args, **kwargs)
+
+ def test_reset(self):
+ retry = self._makeOne(delay=0, max_tries=2)
+ retry(self._fail())
+ eq_(retry._attempts, 1)
+ retry.reset()
+ eq_(retry._attempts, 0)
+
+ def test_too_many_tries(self):
+ from kazoo.retry import RetryFailedError
+ retry = self._makeOne(delay=0)
+ self.assertRaises(RetryFailedError, retry, self._fail(times=999))
+ eq_(retry._attempts, 1)
+
+ def test_maximum_delay(self):
+ def sleep_func(_time):
+ pass
+
+ retry = self._makeOne(delay=10, max_tries=100, sleep_func=sleep_func)
+ retry(self._fail(times=10))
+ self.assertTrue(retry._cur_delay < 4000, retry._cur_delay)
+ # gevent's sleep function is picky about the type
+ eq_(type(retry._cur_delay), float)
+
+ def test_copy(self):
+ _sleep = lambda t: None
+ retry = self._makeOne(sleep_func=_sleep)
+ rcopy = retry.copy()
+ self.assertTrue(rcopy.sleep_func is _sleep)
+
+
+class TestKazooRetry(unittest.TestCase):
+
+ def _makeOne(self, **kw):
+ from kazoo.retry import KazooRetry
+ return KazooRetry(**kw)
+
+ def test_connection_closed(self):
+ from kazoo.exceptions import ConnectionClosedError
+ retry = self._makeOne()
+
+ def testit():
+ raise ConnectionClosedError()
+ self.assertRaises(ConnectionClosedError, retry, testit)
+
+ def test_session_expired(self):
+ from kazoo.exceptions import SessionExpiredError
+ retry = self._makeOne(max_tries=1)
+
+ def testit():
+ raise SessionExpiredError()
+ self.assertRaises(Exception, retry, testit)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_security.py b/slider-agent/src/main/python/kazoo/tests/test_security.py
new file mode 100644
index 0000000..587c265
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_security.py
@@ -0,0 +1,41 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import unittest
+
+from nose.tools import eq_
+from kazoo.security import Permissions
+
+
+class TestACL(unittest.TestCase):
+ def _makeOne(self, *args, **kwargs):
+ from kazoo.security import make_acl
+ return make_acl(*args, **kwargs)
+
+ def test_read_acl(self):
+ acl = self._makeOne("digest", ":", read=True)
+ eq_(acl.perms & Permissions.READ, Permissions.READ)
+
+ def test_all_perms(self):
+ acl = self._makeOne("digest", ":", read=True, write=True,
+ create=True, delete=True, admin=True)
+ for perm in [Permissions.READ, Permissions.CREATE, Permissions.WRITE,
+ Permissions.DELETE, Permissions.ADMIN]:
+ eq_(acl.perms & perm, perm)
+
+ def test_perm_listing(self):
+ from kazoo.security import ACL
+ f = ACL(15, 'fred')
+ self.assert_('READ' in f.acl_list)
+ self.assert_('WRITE' in f.acl_list)
+ self.assert_('CREATE' in f.acl_list)
+ self.assert_('DELETE' in f.acl_list)
+
+ f = ACL(16, 'fred')
+ self.assert_('ADMIN' in f.acl_list)
+
+ f = ACL(31, 'george')
+ self.assert_('ALL' in f.acl_list)
+
+ def test_perm_repr(self):
+ from kazoo.security import ACL
+ f = ACL(16, 'fred')
+ self.assert_("ACL(perms=16, acl_list=['ADMIN']" in repr(f))
diff --git a/slider-agent/src/main/python/kazoo/tests/test_threading_handler.py b/slider-agent/src/main/python/kazoo/tests/test_threading_handler.py
new file mode 100644
index 0000000..4de5781
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_threading_handler.py
@@ -0,0 +1,327 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import threading
+import unittest
+
+import mock
+from nose.tools import assert_raises
+from nose.tools import eq_
+from nose.tools import raises
+
+
+class TestThreadingHandler(unittest.TestCase):
+ def _makeOne(self, *args):
+ from kazoo.handlers.threading import SequentialThreadingHandler
+ return SequentialThreadingHandler(*args)
+
+ def _getAsync(self, *args):
+ from kazoo.handlers.threading import AsyncResult
+ return AsyncResult
+
+ def test_proper_threading(self):
+ h = self._makeOne()
+ h.start()
+ # In Python 3.3 _Event is gone, before Event is function
+ event_class = getattr(threading, '_Event', threading.Event)
+ assert isinstance(h.event_object(), event_class)
+
+ def test_matching_async(self):
+ h = self._makeOne()
+ h.start()
+ async = self._getAsync()
+ assert isinstance(h.async_result(), async)
+
+ def test_exception_raising(self):
+ h = self._makeOne()
+
+ @raises(h.timeout_exception)
+ def testit():
+ raise h.timeout_exception("This is a timeout")
+ testit()
+
+ def test_double_start_stop(self):
+ h = self._makeOne()
+ h.start()
+ self.assertTrue(h._running)
+ h.start()
+ h.stop()
+ h.stop()
+ self.assertFalse(h._running)
+
+
+class TestThreadingAsync(unittest.TestCase):
+ def _makeOne(self, *args):
+ from kazoo.handlers.threading import AsyncResult
+ return AsyncResult(*args)
+
+ def _makeHandler(self):
+ from kazoo.handlers.threading import SequentialThreadingHandler
+ return SequentialThreadingHandler()
+
+ def test_ready(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ eq_(async.ready(), False)
+ async.set('val')
+ eq_(async.ready(), True)
+ eq_(async.successful(), True)
+ eq_(async.exception, None)
+
+ def test_callback_queued(self):
+ mock_handler = mock.Mock()
+ mock_handler.completion_queue = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ async.rawlink(lambda a: a)
+ async.set('val')
+
+ assert mock_handler.completion_queue.put.called
+
+ def test_set_exception(self):
+ mock_handler = mock.Mock()
+ mock_handler.completion_queue = mock.Mock()
+ async = self._makeOne(mock_handler)
+ async.rawlink(lambda a: a)
+ async.set_exception(ImportError('Error occured'))
+
+ assert isinstance(async.exception, ImportError)
+ assert mock_handler.completion_queue.put.called
+
+ def test_get_wait_while_setting(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+ bv = threading.Event()
+ cv = threading.Event()
+
+ def wait_for_val():
+ bv.set()
+ val = async.get()
+ lst.append(val)
+ cv.set()
+ th = threading.Thread(target=wait_for_val)
+ th.start()
+ bv.wait()
+
+ async.set('fred')
+ cv.wait()
+ eq_(lst, ['fred'])
+ th.join()
+
+ def test_get_with_nowait(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+ timeout = self._makeHandler().timeout_exception
+
+ @raises(timeout)
+ def test_it():
+ async.get(block=False)
+ test_it()
+
+ @raises(timeout)
+ def test_nowait():
+ async.get_nowait()
+ test_nowait()
+
+ def test_get_with_exception(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+ bv = threading.Event()
+ cv = threading.Event()
+
+ def wait_for_val():
+ bv.set()
+ try:
+ val = async.get()
+ except ImportError:
+ lst.append('oops')
+ else:
+ lst.append(val)
+ cv.set()
+ th = threading.Thread(target=wait_for_val)
+ th.start()
+ bv.wait()
+
+ async.set_exception(ImportError)
+ cv.wait()
+ eq_(lst, ['oops'])
+ th.join()
+
+ def test_wait(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+ bv = threading.Event()
+ cv = threading.Event()
+
+ def wait_for_val():
+ bv.set()
+ try:
+ val = async.wait(10)
+ except ImportError:
+ lst.append('oops')
+ else:
+ lst.append(val)
+ cv.set()
+ th = threading.Thread(target=wait_for_val)
+ th.start()
+ bv.wait(10)
+
+ async.set("fred")
+ cv.wait(15)
+ eq_(lst, [True])
+ th.join()
+
+ def test_set_before_wait(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+ cv = threading.Event()
+ async.set('fred')
+
+ def wait_for_val():
+ val = async.get()
+ lst.append(val)
+ cv.set()
+ th = threading.Thread(target=wait_for_val)
+ th.start()
+ cv.wait()
+ eq_(lst, ['fred'])
+ th.join()
+
+ def test_set_exc_before_wait(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+ cv = threading.Event()
+ async.set_exception(ImportError)
+
+ def wait_for_val():
+ try:
+ val = async.get()
+ except ImportError:
+ lst.append('ooops')
+ else:
+ lst.append(val)
+ cv.set()
+ th = threading.Thread(target=wait_for_val)
+ th.start()
+ cv.wait()
+ eq_(lst, ['ooops'])
+ th.join()
+
+ def test_linkage(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+ cv = threading.Event()
+
+ lst = []
+
+ def add_on():
+ lst.append(True)
+
+ def wait_for_val():
+ async.get()
+ cv.set()
+
+ th = threading.Thread(target=wait_for_val)
+ th.start()
+
+ async.rawlink(add_on)
+ async.set('fred')
+ assert mock_handler.completion_queue.put.called
+ async.unlink(add_on)
+ cv.wait()
+ eq_(async.value, 'fred')
+ th.join()
+
+ def test_linkage_not_ready(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+
+ def add_on():
+ lst.append(True)
+
+ async.set('fred')
+ assert not mock_handler.completion_queue.called
+ async.rawlink(add_on)
+ assert mock_handler.completion_queue.put.called
+
+ def test_link_and_unlink(self):
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+
+ def add_on():
+ lst.append(True)
+
+ async.rawlink(add_on)
+ assert not mock_handler.completion_queue.put.called
+ async.unlink(add_on)
+ async.set('fred')
+ assert not mock_handler.completion_queue.put.called
+
+ def test_captured_exception(self):
+ from kazoo.handlers.utils import capture_exceptions
+
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ @capture_exceptions(async)
+ def exceptional_function():
+ return 1/0
+
+ exceptional_function()
+
+ assert_raises(ZeroDivisionError, async.get)
+
+ def test_no_capture_exceptions(self):
+ from kazoo.handlers.utils import capture_exceptions
+
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+
+ def add_on():
+ lst.append(True)
+
+ async.rawlink(add_on)
+
+ @capture_exceptions(async)
+ def regular_function():
+ return True
+
+ regular_function()
+
+ assert not mock_handler.completion_queue.put.called
+
+ def test_wraps(self):
+ from kazoo.handlers.utils import wrap
+
+ mock_handler = mock.Mock()
+ async = self._makeOne(mock_handler)
+
+ lst = []
+
+ def add_on(result):
+ lst.append(result.get())
+
+ async.rawlink(add_on)
+
+ @wrap(async)
+ def regular_function():
+ return 'hello'
+
+ assert regular_function() == 'hello'
+ assert mock_handler.completion_queue.put.called
+ assert async.get() == 'hello'
diff --git a/slider-agent/src/main/python/kazoo/tests/test_watchers.py b/slider-agent/src/main/python/kazoo/tests/test_watchers.py
new file mode 100644
index 0000000..44795c4
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_watchers.py
@@ -0,0 +1,490 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import time
+import threading
+import uuid
+
+from nose.tools import eq_
+from nose.tools import raises
+
+from kazoo.exceptions import KazooException
+from kazoo.protocol.states import EventType
+from kazoo.testing import KazooTestCase
+
+
+class KazooDataWatcherTests(KazooTestCase):
+ def setUp(self):
+ super(KazooDataWatcherTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+ self.client.ensure_path(self.path)
+
+ def test_data_watcher(self):
+ update = threading.Event()
+ data = [True]
+
+ # Make it a non-existent path
+ self.path += 'f'
+
+ @self.client.DataWatch(self.path)
+ def changed(d, stat):
+ data.pop()
+ data.append(d)
+ update.set()
+
+ update.wait(10)
+ eq_(data, [None])
+ update.clear()
+
+ self.client.create(self.path, b'fred')
+ update.wait(10)
+ eq_(data[0], b'fred')
+ update.clear()
+
+ def test_data_watcher_once(self):
+ update = threading.Event()
+ data = [True]
+
+ # Make it a non-existent path
+ self.path += 'f'
+
+ dwatcher = self.client.DataWatch(self.path)
+
+ @dwatcher
+ def changed(d, stat):
+ data.pop()
+ data.append(d)
+ update.set()
+
+ update.wait(10)
+ eq_(data, [None])
+ update.clear()
+
+ @raises(KazooException)
+ def test_it():
+ @dwatcher
+ def func(d, stat):
+ data.pop()
+ test_it()
+
+ def test_data_watcher_with_event(self):
+ # Test that the data watcher gets passed the event, if it
+ # accepts three arguments
+ update = threading.Event()
+ data = [True]
+
+ # Make it a non-existent path
+ self.path += 'f'
+
+ @self.client.DataWatch(self.path)
+ def changed(d, stat, event):
+ data.pop()
+ data.append(event)
+ update.set()
+
+ update.wait(10)
+ eq_(data, [None])
+ update.clear()
+
+ self.client.create(self.path, b'fred')
+ update.wait(10)
+ eq_(data[0].type, EventType.CREATED)
+ update.clear()
+
+ def test_func_style_data_watch(self):
+ update = threading.Event()
+ data = [True]
+
+ # Make it a non-existent path
+ path = self.path + 'f'
+
+ def changed(d, stat):
+ data.pop()
+ data.append(d)
+ update.set()
+ self.client.DataWatch(path, changed)
+
+ update.wait(10)
+ eq_(data, [None])
+ update.clear()
+
+ self.client.create(path, b'fred')
+ update.wait(10)
+ eq_(data[0], b'fred')
+ update.clear()
+
+ def test_datawatch_across_session_expire(self):
+ update = threading.Event()
+ data = [True]
+
+ @self.client.DataWatch(self.path)
+ def changed(d, stat):
+ data.pop()
+ data.append(d)
+ update.set()
+
+ update.wait(10)
+ eq_(data, [b""])
+ update.clear()
+
+ self.expire_session()
+ self.client.retry(self.client.set, self.path, b'fred')
+ update.wait(25)
+ eq_(data[0], b'fred')
+
+ def test_func_stops(self):
+ update = threading.Event()
+ data = [True]
+
+ self.path += "f"
+
+ fail_through = []
+
+ @self.client.DataWatch(self.path)
+ def changed(d, stat):
+ data.pop()
+ data.append(d)
+ update.set()
+ if fail_through:
+ return False
+
+ update.wait(10)
+ eq_(data, [None])
+ update.clear()
+
+ fail_through.append(True)
+ self.client.create(self.path, b'fred')
+ update.wait(10)
+ eq_(data[0], b'fred')
+ update.clear()
+
+ self.client.set(self.path, b'asdfasdf')
+ update.wait(0.2)
+ eq_(data[0], b'fred')
+
+ d, stat = self.client.get(self.path)
+ eq_(d, b'asdfasdf')
+
+ def test_no_such_node(self):
+ args = []
+
+ @self.client.DataWatch("/some/path")
+ def changed(d, stat):
+ args.extend([d, stat])
+
+ eq_(args, [None, None])
+
+ def test_bad_watch_func2(self):
+ counter = 0
+
+ @self.client.DataWatch(self.path)
+ def changed(d, stat):
+ if counter > 0:
+ raise Exception("oops")
+
+ raises(Exception)(changed)
+
+ counter += 1
+ self.client.set(self.path, b'asdfasdf')
+
+ def test_watcher_evaluating_to_false(self):
+ class WeirdWatcher(list):
+ def __call__(self, *args):
+ self.called = True
+ watcher = WeirdWatcher()
+ self.client.DataWatch(self.path, watcher)
+ self.client.set(self.path, b'mwahaha')
+ self.assertTrue(watcher.called)
+
+ def test_watcher_repeat_delete(self):
+ a = []
+ ev = threading.Event()
+
+ self.client.delete(self.path)
+
+ @self.client.DataWatch(self.path)
+ def changed(val, stat):
+ a.append(val)
+ ev.set()
+
+ eq_(a, [None])
+ ev.wait(10)
+ ev.clear()
+ self.client.create(self.path, b'blah')
+ ev.wait(10)
+ eq_(ev.is_set(), True)
+ ev.clear()
+ eq_(a, [None, b'blah'])
+ self.client.delete(self.path)
+ ev.wait(10)
+ eq_(ev.is_set(), True)
+ ev.clear()
+ eq_(a, [None, b'blah', None])
+ self.client.create(self.path, b'blah')
+ ev.wait(10)
+ eq_(ev.is_set(), True)
+ ev.clear()
+ eq_(a, [None, b'blah', None, b'blah'])
+
+ def test_watcher_with_closing(self):
+ a = []
+ ev = threading.Event()
+
+ self.client.delete(self.path)
+
+ @self.client.DataWatch(self.path)
+ def changed(val, stat):
+ a.append(val)
+ ev.set()
+ eq_(a, [None])
+
+ b = False
+ try:
+ self.client.stop()
+ except:
+ b = True
+ eq_(b, False)
+
+
+class KazooChildrenWatcherTests(KazooTestCase):
+ def setUp(self):
+ super(KazooChildrenWatcherTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+ self.client.ensure_path(self.path)
+
+ def test_child_watcher(self):
+ update = threading.Event()
+ all_children = ['fred']
+
+ @self.client.ChildrenWatch(self.path)
+ def changed(children):
+ while all_children:
+ all_children.pop()
+ all_children.extend(children)
+ update.set()
+
+ update.wait(10)
+ eq_(all_children, [])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'smith')
+ update.wait(10)
+ eq_(all_children, ['smith'])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'george')
+ update.wait(10)
+ eq_(sorted(all_children), ['george', 'smith'])
+
+ def test_child_watcher_once(self):
+ update = threading.Event()
+ all_children = ['fred']
+
+ cwatch = self.client.ChildrenWatch(self.path)
+
+ @cwatch
+ def changed(children):
+ while all_children:
+ all_children.pop()
+ all_children.extend(children)
+ update.set()
+
+ update.wait(10)
+ eq_(all_children, [])
+ update.clear()
+
+ @raises(KazooException)
+ def test_it():
+ @cwatch
+ def changed_again(children):
+ update.set()
+ test_it()
+
+ def test_child_watcher_with_event(self):
+ update = threading.Event()
+ events = [True]
+
+ @self.client.ChildrenWatch(self.path, send_event=True)
+ def changed(children, event):
+ events.pop()
+ events.append(event)
+ update.set()
+
+ update.wait(10)
+ eq_(events, [None])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'smith')
+ update.wait(10)
+ eq_(events[0].type, EventType.CHILD)
+ update.clear()
+
+ def test_func_style_child_watcher(self):
+ update = threading.Event()
+ all_children = ['fred']
+
+ def changed(children):
+ while all_children:
+ all_children.pop()
+ all_children.extend(children)
+ update.set()
+
+ self.client.ChildrenWatch(self.path, changed)
+
+ update.wait(10)
+ eq_(all_children, [])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'smith')
+ update.wait(10)
+ eq_(all_children, ['smith'])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'george')
+ update.wait(10)
+ eq_(sorted(all_children), ['george', 'smith'])
+
+ def test_func_stops(self):
+ update = threading.Event()
+ all_children = ['fred']
+
+ fail_through = []
+
+ @self.client.ChildrenWatch(self.path)
+ def changed(children):
+ while all_children:
+ all_children.pop()
+ all_children.extend(children)
+ update.set()
+ if fail_through:
+ return False
+
+ update.wait(10)
+ eq_(all_children, [])
+ update.clear()
+
+ fail_through.append(True)
+ self.client.create(self.path + '/' + 'smith')
+ update.wait(10)
+ eq_(all_children, ['smith'])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'george')
+ update.wait(0.5)
+ eq_(all_children, ['smith'])
+
+ def test_child_watch_session_loss(self):
+ update = threading.Event()
+ all_children = ['fred']
+
+ @self.client.ChildrenWatch(self.path)
+ def changed(children):
+ while all_children:
+ all_children.pop()
+ all_children.extend(children)
+ update.set()
+
+ update.wait(10)
+ eq_(all_children, [])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'smith')
+ update.wait(10)
+ eq_(all_children, ['smith'])
+ update.clear()
+ self.expire_session()
+
+ self.client.retry(self.client.create,
+ self.path + '/' + 'george')
+ update.wait(20)
+ eq_(sorted(all_children), ['george', 'smith'])
+
+ def test_child_stop_on_session_loss(self):
+ update = threading.Event()
+ all_children = ['fred']
+
+ @self.client.ChildrenWatch(self.path, allow_session_lost=False)
+ def changed(children):
+ while all_children:
+ all_children.pop()
+ all_children.extend(children)
+ update.set()
+
+ update.wait(10)
+ eq_(all_children, [])
+ update.clear()
+
+ self.client.create(self.path + '/' + 'smith')
+ update.wait(10)
+ eq_(all_children, ['smith'])
+ update.clear()
+ self.expire_session()
+
+ self.client.retry(self.client.create,
+ self.path + '/' + 'george')
+ update.wait(4)
+ eq_(update.is_set(), False)
+ eq_(all_children, ['smith'])
+
+ children = self.client.get_children(self.path)
+ eq_(sorted(children), ['george', 'smith'])
+
+ def test_bad_children_watch_func(self):
+ counter = 0
+
+ @self.client.ChildrenWatch(self.path)
+ def changed(children):
+ if counter > 0:
+ raise Exception("oops")
+
+ raises(Exception)(changed)
+ counter += 1
+ self.client.create(self.path + '/' + 'smith')
+
+
+class KazooPatientChildrenWatcherTests(KazooTestCase):
+ def setUp(self):
+ super(KazooPatientChildrenWatcherTests, self).setUp()
+ self.path = "/" + uuid.uuid4().hex
+
+ def _makeOne(self, *args, **kwargs):
+ from kazoo.recipe.watchers import PatientChildrenWatch
+ return PatientChildrenWatch(*args, **kwargs)
+
+ def test_watch(self):
+ self.client.ensure_path(self.path)
+ watcher = self._makeOne(self.client, self.path, 0.1)
+ result = watcher.start()
+ children, asy = result.get()
+ eq_(len(children), 0)
+ eq_(asy.ready(), False)
+
+ self.client.create(self.path + '/' + 'fred')
+ asy.get(timeout=1)
+ eq_(asy.ready(), True)
+
+ def test_exception(self):
+ from kazoo.exceptions import NoNodeError
+ watcher = self._makeOne(self.client, self.path, 0.1)
+ result = watcher.start()
+
+ @raises(NoNodeError)
+ def testit():
+ result.get()
+ testit()
+
+ def test_watch_iterations(self):
+ self.client.ensure_path(self.path)
+ watcher = self._makeOne(self.client, self.path, 0.5)
+ result = watcher.start()
+ eq_(result.ready(), False)
+
+ time.sleep(0.08)
+ self.client.create(self.path + '/' + uuid.uuid4().hex)
+ eq_(result.ready(), False)
+ time.sleep(0.08)
+ eq_(result.ready(), False)
+ self.client.create(self.path + '/' + uuid.uuid4().hex)
+ time.sleep(0.08)
+ eq_(result.ready(), False)
+
+ children, asy = result.get()
+ eq_(len(children), 2)
diff --git a/slider-agent/src/main/python/kazoo/tests/util.py b/slider-agent/src/main/python/kazoo/tests/util.py
new file mode 100644
index 0000000..906cbc0
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/util.py
@@ -0,0 +1,127 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+##############################################################################
+#
+# Copyright Zope Foundation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+import logging
+import os
+import time
+
+TRAVIS = os.environ.get('TRAVIS', False)
+TRAVIS_ZK_VERSION = TRAVIS and os.environ.get('ZOOKEEPER_VERSION', None)
+if TRAVIS_ZK_VERSION:
+ TRAVIS_ZK_VERSION = tuple([int(n) for n in TRAVIS_ZK_VERSION.split('.')])
+
+
+class Handler(logging.Handler):
+
+ def __init__(self, *names, **kw):
+ logging.Handler.__init__(self)
+ self.names = names
+ self.records = []
+ self.setLoggerLevel(**kw)
+
+ def setLoggerLevel(self, level=1):
+ self.level = level
+ self.oldlevels = {}
+
+ def emit(self, record):
+ self.records.append(record)
+
+ def clear(self):
+ del self.records[:]
+
+ def install(self):
+ for name in self.names:
+ logger = logging.getLogger(name)
+ self.oldlevels[name] = logger.level
+ logger.setLevel(self.level)
+ logger.addHandler(self)
+
+ def uninstall(self):
+ for name in self.names:
+ logger = logging.getLogger(name)
+ logger.setLevel(self.oldlevels[name])
+ logger.removeHandler(self)
+
+ def __str__(self):
+ return '\n'.join(
+ [("%s %s\n %s" %
+ (record.name, record.levelname,
+ '\n'.join([line
+ for line in record.getMessage().split('\n')
+ if line.strip()])
+ )
+ )
+ for record in self.records]
+ )
+
+
+class InstalledHandler(Handler):
+
+ def __init__(self, *names, **kw):
+ Handler.__init__(self, *names, **kw)
+ self.install()
+
+
+class Wait(object):
+
+ class TimeOutWaitingFor(Exception):
+ "A test condition timed out"
+
+ timeout = 9
+ wait = .01
+
+ def __init__(self, timeout=None, wait=None, exception=None,
+ getnow=(lambda: time.time), getsleep=(lambda: time.sleep)):
+
+ if timeout is not None:
+ self.timeout = timeout
+
+ if wait is not None:
+ self.wait = wait
+
+ if exception is not None:
+ self.TimeOutWaitingFor = exception
+
+ self.getnow = getnow
+ self.getsleep = getsleep
+
+ def __call__(self, func=None, timeout=None, wait=None, message=None):
+ if func is None:
+ return lambda func: self(func, timeout, wait, message)
+
+ if func():
+ return
+
+ now = self.getnow()
+ sleep = self.getsleep()
+ if timeout is None:
+ timeout = self.timeout
+ if wait is None:
+ wait = self.wait
+ wait = float(wait)
+
+ deadline = now() + timeout
+ while 1:
+ sleep(wait)
+ if func():
+ return
+ if now() > deadline:
+ raise self.TimeOutWaitingFor(
+ message or
+ getattr(func, '__doc__') or
+ getattr(func, '__name__')
+ )
+
+wait = Wait()
diff --git a/slider-agent/src/main/python/resource_management/core/logger.py b/slider-agent/src/main/python/resource_management/core/logger.py
index 7370c97..b80042a 100644
--- a/slider-agent/src/main/python/resource_management/core/logger.py
+++ b/slider-agent/src/main/python/resource_management/core/logger.py
@@ -79,7 +79,10 @@
val = "[EMPTY]"
# correctly output 'mode' (as they are octal values like 0755)
elif y and x == 'mode':
- val = oct(y)
+ try:
+ val = oct(y)
+ except:
+ val = repr(y)
else:
val = repr(y)
diff --git a/slider-agent/src/main/python/resource_management/core/providers/__init__.py b/slider-agent/src/main/python/resource_management/core/providers/__init__.py
index 0c170e7..630183b 100644
--- a/slider-agent/src/main/python/resource_management/core/providers/__init__.py
+++ b/slider-agent/src/main/python/resource_management/core/providers/__init__.py
@@ -50,6 +50,11 @@
debian=dict(
Package="resource_management.core.providers.package.apt.AptProvider",
),
+ winsrv=dict(
+ Service="resource_management.core.providers.windows.service.ServiceProvider",
+ Execute="resource_management.core.providers.windows.system.ExecuteProvider",
+ File="resource_management.core.providers.windows.system.FileProvider"
+ ),
default=dict(
File="resource_management.core.providers.system.FileProvider",
Directory="resource_management.core.providers.system.DirectoryProvider",
diff --git a/slider-agent/src/main/python/resource_management/core/providers/accounts.py b/slider-agent/src/main/python/resource_management/core/providers/accounts.py
index 747f120..8711e45 100644
--- a/slider-agent/src/main/python/resource_management/core/providers/accounts.py
+++ b/slider-agent/src/main/python/resource_management/core/providers/accounts.py
@@ -22,8 +22,8 @@
from __future__ import with_statement
-import grp
-import pwd
+#import grp
+#import pwd
from resource_management.core import shell
from resource_management.core.providers import Provider
from resource_management.core.logger import Logger
@@ -70,10 +70,10 @@
@property
def user(self):
- try:
- return pwd.getpwnam(self.resource.username)
- except KeyError:
- return None
+ #try:
+ # return pwd.getpwnam(self.resource.username)
+ #except KeyError:
+ return None
class GroupProvider(Provider):
@@ -110,7 +110,7 @@
@property
def group(self):
- try:
- return grp.getgrnam(self.resource.group_name)
- except KeyError:
- return None
+ #try:
+ # return grp.getgrnam(self.resource.group_name)
+ #except KeyError:
+ return None
diff --git a/slider-agent/src/main/python/resource_management/core/providers/system.py b/slider-agent/src/main/python/resource_management/core/providers/system.py
index 3475d6a..6969c62 100644
--- a/slider-agent/src/main/python/resource_management/core/providers/system.py
+++ b/slider-agent/src/main/python/resource_management/core/providers/system.py
@@ -22,9 +22,8 @@
from __future__ import with_statement
-import grp
+import platform
import os
-import pwd
import time
import shutil
from resource_management.core import shell
@@ -33,14 +32,22 @@
from resource_management.core.providers import Provider
from resource_management.core.logger import Logger
+IS_WINDOWS = platform.system() == "Windows"
+
+if not IS_WINDOWS:
+ import grp
+ import pwd
def _coerce_uid(user):
try:
uid = int(user)
except ValueError:
- try:
- uid = pwd.getpwnam(user).pw_uid
- except KeyError:
+ if not IS_WINDOWS:
+ try:
+ uid = pwd.getpwnam(user).pw_uid
+ except KeyError:
+ raise Fail("User %s doesn't exist." % user)
+ else:
raise Fail("User %s doesn't exist." % user)
return uid
@@ -49,10 +56,13 @@
try:
gid = int(group)
except ValueError:
- try:
- gid = grp.getgrnam(group).gr_gid
- except KeyError:
- raise Fail("Group %s doesn't exist." % group)
+ if not IS_WINDOWS:
+ try:
+ gid = grp.getgrnam(group).gr_gid
+ except KeyError:
+ raise Fail("Group %s doesn't exist." % group)
+ else:
+ raise Fail("User %s doesn't exist." % user)
return gid
@@ -232,7 +242,8 @@
shell.checked_call(self.resource.command, logoutput=self.resource.logoutput,
cwd=self.resource.cwd, env=self.resource.environment,
preexec_fn=_preexec_fn(self.resource), user=self.resource.user,
- wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout)
+ wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,
+ pid_file=self.resource.pid_file)
break
except Fail as ex:
if i == self.resource.tries-1: # last try
diff --git a/slider-agent/src/main/python/resource_management/core/providers/windows/__init__.py b/slider-agent/src/main/python/resource_management/core/providers/windows/__init__.py
new file mode 100644
index 0000000..49fddbd
--- /dev/null
+++ b/slider-agent/src/main/python/resource_management/core/providers/windows/__init__.py
@@ -0,0 +1,20 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Slider Agent
+
+"""
\ No newline at end of file
diff --git a/slider-agent/src/main/python/resource_management/core/providers/windows/service.py b/slider-agent/src/main/python/resource_management/core/providers/windows/service.py
new file mode 100644
index 0000000..4e73a2d
--- /dev/null
+++ b/slider-agent/src/main/python/resource_management/core/providers/windows/service.py
@@ -0,0 +1,65 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Slider Agent
+
+"""
+from resource_management.core.providers import Provider
+from resource_management.core.base import Fail
+import win32service
+import time
+
+
+_schSCManager = win32service.OpenSCManager(None, None, win32service.SC_MANAGER_ALL_ACCESS)
+
+
+class ServiceProvider(Provider):
+ def action_start(self):
+ self._service_handle = self._service_handle if hasattr(self, "_service_handle") else \
+ win32service.OpenService(_schSCManager, self.resource.service_name, win32service.SERVICE_ALL_ACCESS)
+ if not self.status():
+ win32service.StartService(self._service_handle, None)
+ self.wait_status(win32service.SERVICE_RUNNING)
+
+ def action_stop(self):
+ self._service_handle = self._service_handle if hasattr(self, "_service_handle") else \
+ win32service.OpenService(_schSCManager, self.resource.service_name, win32service.SERVICE_ALL_ACCESS)
+ if self.status():
+ win32service.ControlService(self._service_handle, win32service.SERVICE_CONTROL_STOP)
+ self.wait_status(win32service.SERVICE_STOPPED)
+
+ def action_restart(self):
+ self._service_handle = win32service.OpenService(_schSCManager, self.resource.service_name,
+ win32service.SERVICE_ALL_ACCESS)
+ self.action_stop()
+ self.action_start()
+
+ def action_reload(self):
+ raise Fail("Reload for Service resource not supported on windows")
+
+ def status(self):
+ if win32service.QueryServiceStatusEx(self._service_handle)["CurrentState"] == win32service.SERVICE_RUNNING:
+ return True
+ return False
+
+ def get_current_status(self):
+ return win32service.QueryServiceStatusEx(self._service_handle)["CurrentState"]
+
+ def wait_status(self, status, timeout=5):
+ begin = time.time()
+ while self.get_current_status() != status and (timeout == 0 or time.time() - begin < timeout):
+ time.sleep(1)
\ No newline at end of file
diff --git a/slider-agent/src/main/python/resource_management/core/providers/windows/system.py b/slider-agent/src/main/python/resource_management/core/providers/windows/system.py
new file mode 100644
index 0000000..f0d4825
--- /dev/null
+++ b/slider-agent/src/main/python/resource_management/core/providers/windows/system.py
@@ -0,0 +1,195 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Slider Agent
+
+"""
+
+from resource_management.core.providers import Provider
+from resource_management.core.logger import Logger
+from resource_management.core.base import Fail
+from resource_management.core import ExecuteTimeoutException
+from multiprocessing import Queue
+import time
+import os
+import subprocess
+import shutil
+
+
+def _call_command(command, logoutput=False, cwd=None, env=None, wait_for_finish=True, timeout=None, pid_file_name=None):
+ # TODO implement logoutput
+ Logger.info("Executing %s" % (command))
+ proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+ cwd=cwd, env=env, shell=False)
+ if not wait_for_finish:
+ if pid_file_name:
+ pidfile = open(pid_file_name, 'w')
+ pidfile.write(str(proc.pid))
+ pidfile.close()
+ return None, None
+
+ if timeout:
+ q = Queue()
+ t = threading.Timer( timeout, on_timeout, [proc, q] )
+ t.start()
+
+ out = proc.communicate()[0].strip()
+ code = proc.returncode
+ if logoutput and out:
+ Logger.info(out)
+ return code, out
+
+# see msdn Icacls doc for rights
+def _set_file_acl(file, user, rights):
+ acls_modify_cmd = "icacls {0} /grant {1}:{2}".format(file, user, rights)
+ acls_remove_cmd = "icacls {0} /remove {1}".format(file, user)
+ code, out = _call_command(acls_remove_cmd)
+ if code != 0:
+ raise Fail("Can not remove rights for path {0} and user {1}".format(file, user))
+ code, out = _call_command(acls_modify_cmd)
+ if code != 0:
+ raise Fail("Can not set rights {0} for path {1} and user {2}".format(file, user))
+ else:
+ return
+
+class FileProvider(Provider):
+ def action_create(self):
+ path = self.resource.path
+
+ if os.path.isdir(path):
+ raise Fail("Applying %s failed, directory with name %s exists" % (self.resource, path))
+
+ dirname = os.path.dirname(path)
+ if not os.path.isdir(dirname):
+ raise Fail("Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname))
+
+ write = False
+ content = self._get_content()
+ if not os.path.exists(path):
+ write = True
+ reason = "it doesn't exist"
+ elif self.resource.replace:
+ if content is not None:
+ with open(path, "rb") as fp:
+ old_content = fp.read()
+ if content != old_content:
+ write = True
+ reason = "contents don't match"
+ if self.resource.backup:
+ self.resource.env.backup_file(path)
+
+ if write:
+ Logger.info("Writing %s because %s" % (self.resource, reason))
+ with open(path, "wb") as fp:
+ if content:
+ fp.write(content)
+
+ if self.resource.owner and self.resource.mode:
+ _set_file_acl(self.resource.path, self.resource.owner, self.resource.mode)
+
+ def action_delete(self):
+ path = self.resource.path
+
+ if os.path.isdir(path):
+ raise Fail("Applying %s failed, %s is directory not file!" % (self.resource, path))
+
+ if os.path.exists(path):
+ Logger.info("Deleting %s" % self.resource)
+ os.unlink(path)
+
+ def _get_content(self):
+ content = self.resource.content
+ if content is None:
+ return None
+ elif isinstance(content, basestring):
+ return content
+ elif hasattr(content, "__call__"):
+ return content()
+ raise Fail("Unknown source type for %s: %r" % (self, content))
+
+class ExecuteProvider(Provider):
+ def action_run(self):
+ if self.resource.creates:
+ if os.path.exists(self.resource.creates):
+ return
+
+ Logger.debug("Executing %s" % self.resource)
+
+ if self.resource.path != []:
+ if not self.resource.environment:
+ self.resource.environment = {}
+
+ self.resource.environment['PATH'] = os.pathsep.join(self.resource.path)
+
+ for i in range(0, self.resource.tries):
+ try:
+ _call_command(self.resource.command, logoutput=self.resource.logoutput,
+ cwd=self.resource.cwd, env=self.resource.environment,
+ wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,
+ pid_file_name=self.resource.pid_file)
+ break
+ except Fail as ex:
+ if i == self.resource.tries - 1: # last try
+ raise ex
+ else:
+ Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex)))
+ time.sleep(self.resource.try_sleep)
+ except ExecuteTimeoutException:
+ err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (
+ self.resource.command, self.resource.timeout)
+
+ if self.resource.on_timeout:
+ Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg))
+ _call_command(self.resource.on_timeout)
+ else:
+ raise Fail(err_msg)
+
+
+class DirectoryProvider(Provider):
+ def action_create(self):
+ path = DirectoryProvider._trim_uri(self.resource.path)
+ if not os.path.exists(path):
+ Logger.info("Creating directory %s" % self.resource)
+ if self.resource.recursive:
+ os.makedirs(path)
+ else:
+ dirname = os.path.dirname(path)
+ if not os.path.isdir(dirname):
+ raise Fail("Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname))
+
+ os.mkdir(path)
+
+ if not os.path.isdir(path):
+ raise Fail("Applying %s failed, file %s already exists" % (self.resource, path))
+
+ if self.resource.owner and self.resource.mode:
+ _set_file_acl(path, self.resource.owner, self.resource.mode)
+
+ def action_delete(self):
+ path = self.resource.path
+ if os.path.exists(path):
+ if not os.path.isdir(path):
+ raise Fail("Applying %s failed, %s is not a directory" % (self.resource, path))
+
+ Logger.info("Removing directory %s and all its content" % self.resource)
+ shutil.rmtree(path)
+
+ @staticmethod
+ def _trim_uri(file_uri):
+ if file_uri.startswith("file:///"):
+ return file_uri[8:]
+ return file_uri
diff --git a/slider-agent/src/main/python/resource_management/core/resources/system.py b/slider-agent/src/main/python/resource_management/core/resources/system.py
index 2c832a4..a63d993 100644
--- a/slider-agent/src/main/python/resource_management/core/resources/system.py
+++ b/slider-agent/src/main/python/resource_management/core/resources/system.py
@@ -101,6 +101,10 @@
- try_sleep
"""
wait_for_finish = BooleanArgument(default=True)
+ """
+ if wait_for_finish is True then optionally the caller can ask for the pid to be written
+ """
+ pid_file = ResourceArgument()
class ExecuteScript(Resource):
diff --git a/slider-agent/src/main/python/resource_management/core/shell.py b/slider-agent/src/main/python/resource_management/core/shell.py
index 92312d5..fb2c946 100644
--- a/slider-agent/src/main/python/resource_management/core/shell.py
+++ b/slider-agent/src/main/python/resource_management/core/shell.py
@@ -31,15 +31,15 @@
from resource_management.core.logger import Logger
def checked_call(command, logoutput=False,
- cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None):
- return _call(command, logoutput, True, cwd, env, preexec_fn, user, wait_for_finish, timeout)
+ cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, pid_file=None):
+ return _call(command, logoutput, True, cwd, env, preexec_fn, user, wait_for_finish, timeout, pid_file)
def call(command, logoutput=False,
- cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None):
- return _call(command, logoutput, False, cwd, env, preexec_fn, user, wait_for_finish, timeout)
+ cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, pid_file=None):
+ return _call(command, logoutput, False, cwd, env, preexec_fn, user, wait_for_finish, timeout, pid_file)
def _call(command, logoutput=False, throw_on_failure=True,
- cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None):
+ cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, pid_file_name=None):
"""
Execute shell command
@@ -67,6 +67,10 @@
preexec_fn=preexec_fn)
if not wait_for_finish:
+ if pid_file_name:
+ pidfile = open(pid_file_name, 'w')
+ pidfile.write(str(proc.pid))
+ pidfile.close()
return None, None
if timeout:
diff --git a/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py b/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py
index ad30707..cc0610d 100644
--- a/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py
+++ b/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py
@@ -28,4 +28,4 @@
from resource_management.libraries.functions.is_empty import *
from resource_management.libraries.functions.substitute_vars import *
from resource_management.libraries.functions.os_check import *
-from resource_management.libraries.functions.get_port_from_url import *
\ No newline at end of file
+from resource_management.libraries.functions.get_port_from_url import *
diff --git a/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py b/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py
index abfceb8..8c11d93 100644
--- a/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py
+++ b/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py
@@ -31,41 +31,65 @@
PYTHON_VER = sys.version_info[0] * 10 + sys.version_info[1]
if PYTHON_VER < 26:
- linux_distribution = platform.dist()
+ (distname, version, id) = platform.dist()
else:
- linux_distribution = platform.linux_distribution()
+ (distname, version, id) = platform.linux_distribution()
- return linux_distribution
+ return (platform.system(), os.name, distname, version, id)
+def windows_distribution():
+ # Only support Windows Server 64 bit
+ (win_release, win_version, win_csd, win_ptype) = platform.win32_ver()
+ #if win_version
+ return (platform.system(), os.name, win_release, win_version, win_ptype)
class OS_CONST_TYPE(type):
+ # os platforms
+ LINUX_OS = 'linux'
+ WINDOWS_OS = 'windows'
+
# os families
REDHAT_FAMILY = 'redhat'
DEBIAN_FAMILY = 'debian'
SUSE_FAMILY = 'suse'
+ WINSRV_FAMILY = 'winsrv'
# Declare here os type mapping
OS_FAMILY_COLLECTION = [
- {'name': REDHAT_FAMILY,
- 'os_list':
- ['redhat', 'fedora', 'centos', 'oraclelinux',
- 'ascendos', 'amazon', 'xenserver', 'oel', 'ovs',
- 'cloudlinux', 'slc', 'scientific', 'psbm',
- 'centos linux']
- },
- {'name': DEBIAN_FAMILY,
- 'os_list': ['ubuntu', 'debian']
- },
- {'name': SUSE_FAMILY,
- 'os_list': ['sles', 'sled', 'opensuse', 'suse']
- }
- ]
+ {'name': REDHAT_FAMILY,
+ 'os_list':
+ ['redhat', 'fedora', 'centos', 'oraclelinux',
+ 'ascendos', 'amazon', 'xenserver', 'oel', 'ovs',
+ 'cloudlinux', 'slc', 'scientific', 'psbm',
+ 'centos linux']
+ },
+ {'name': DEBIAN_FAMILY,
+ 'os_list': ['ubuntu', 'debian']
+ },
+ {'name': SUSE_FAMILY,
+ 'os_list': ['sles', 'sled', 'opensuse', 'suse']
+ }
+ ]
+ WIN_OS_FAMILY_COLLECTION = [
+ {'name': WINSRV_FAMILY,
+ 'os_list':
+ ['2008Server', '2012Server']
+ },
+ ]
# Would be generated from Family collection definition
OS_COLLECTION = []
def __init__(cls, name, bases, dct):
- for item in cls.OS_FAMILY_COLLECTION:
- cls.OS_COLLECTION += item['os_list']
+ if platform.system() == 'Windows':
+ for item in cls.WIN_OS_FAMILY_COLLECTION:
+ cls.OS_COLLECTION += item['os_list']
+ else:
+ if platform.system() == 'Mac':
+ raise Exception("MacOS not supported. Exiting...")
+ else:
+ dist = linux_distribution()
+ for item in cls.OS_FAMILY_COLLECTION:
+ cls.OS_COLLECTION += item['os_list']
def __getattr__(cls, name):
"""
@@ -81,17 +105,45 @@
else:
raise Exception("Unknown class property '%s'" % name)
+def get_os_distribution():
+ if platform.system() == 'Windows':
+ dist = windows_distribution()
+ else:
+ if platform.system() == 'Darwin':
+ dist = ("Darwin", "TestOnly", "1.1.1", "1.1.1", "1.1")
+ else:
+ # Linux
+ # Read content from /etc/*-release file
+ # Full release name
+ dist = linux_distribution()
+ return dist
class OSConst:
__metaclass__ = OS_CONST_TYPE
class OSCheck:
+ _dist = get_os_distribution()
+
+ @staticmethod
+ def get_os_os():
+ """
+ Return values:
+ windows, linux
+
+ In case cannot detect - exit.
+ """
+ # Read content from /etc/*-release file
+ # Full release name
+ os_os = OSCheck._dist[0].lower()
+
+ return os_os
@staticmethod
def get_os_type():
"""
Return values:
+ 2008server, 2012server,
redhat, fedora, centos, oraclelinux, ascendos,
amazon, xenserver, oel, ovs, cloudlinux, slc, scientific, psbm,
ubuntu, debian, sles, sled, opensuse, suse ... and others
@@ -100,15 +152,14 @@
"""
# Read content from /etc/*-release file
# Full release name
- dist = linux_distribution()
- operatingSystem = dist[0].lower()
+ operatingSystem = OSCheck._dist[2].lower()
# special cases
if os.path.exists('/etc/oracle-release'):
return 'oraclelinux'
elif operatingSystem.startswith('suse linux enterprise server'):
return 'sles'
- elif operatingSystem.startswith('red hat enterprise linux server'):
+ elif operatingSystem.startswith('red hat enterprise linux'):
return 'redhat'
if operatingSystem != '':
@@ -124,11 +175,14 @@
In case cannot detect raises exception( from self.get_operating_system_type() ).
"""
- os_family = OSCheck.get_os_type()
- for os_family_item in OSConst.OS_FAMILY_COLLECTION:
- if os_family in os_family_item['os_list']:
- os_family = os_family_item['name']
- break
+ if(OSCheck._dist[0] == 'Windows'):
+ os_family = OSConst.WIN_OS_FAMILY_COLLECTION[0]['name']
+ else:
+ os_family = OSCheck.get_os_type()
+ for os_family_item in OSConst.OS_FAMILY_COLLECTION:
+ if os_family in os_family_item['os_list']:
+ os_family = os_family_item['name']
+ break
return os_family.lower()
@@ -139,15 +193,12 @@
In case cannot detect raises exception.
"""
- # Read content from /etc/*-release file
- # Full release name
- dist = linux_distribution()
- dist = dist[1]
+ dist = OSCheck._dist[3]
if dist:
return dist
else:
- raise Exception("Cannot detect os version. Exiting...")
+ raise Exception("Cannot detect os version from " + repr(OSCheck._dist) + " Exiting...")
@staticmethod
def get_os_major_version():
@@ -165,8 +216,7 @@
In case cannot detect raises exception.
"""
- dist = linux_distribution()
- dist = dist[2].lower()
+ dist = OSCheck._dist[4].lower()
if dist:
return dist
@@ -217,6 +267,54 @@
pass
return False
+ @staticmethod
+ def is_windows_family():
+ """
+ Return true if it is so or false if not
+
+ This is safe check for windows family, doesn't generate exception
+ """
+ try:
+ if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY:
+ return True
+ except Exception:
+ pass
+ return False
+
+ @staticmethod
+ def is_linux_os():
+ """
+ Return true if it is so or false if not
+
+ This is safe check for linux os, doesn't generate exception
+ """
+ try:
+ if OSCheck.get_os_os() == OSConst.LINUX_OS:
+ return True
+ except Exception:
+ pass
+ return False
+
+ @staticmethod
+ def is_windows_os():
+ """
+ Return true if it is so or false if not
+
+ This is safe check for windows os, doesn't generate exception
+ """
+ try:
+ if OSCheck.get_os_os() == OSConst.WINDOWS_OS:
+ return True
+ except Exception:
+ pass
+ return False
+
+
+# OS info
+OS_VERSION = OSCheck().get_os_major_version()
+OS_TYPE = OSCheck.get_os_type()
+OS_FAMILY = OSCheck.get_os_family()
+OS_OS = OSCheck.get_os_os()
if __name__ == "__main__":
main()
diff --git a/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py b/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py
index 973958b..1dfeef7 100644
--- a/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py
+++ b/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py
@@ -30,6 +30,8 @@
debian=dict(
Repository="resource_management.libraries.providers.repository.DebianRepositoryProvider",
),
+ winsrv=dict(
+ ),
default=dict(
ExecuteHadoop="resource_management.libraries.providers.execute_hadoop.ExecuteHadoopProvider",
TemplateConfig="resource_management.libraries.providers.template_config.TemplateConfigProvider",
diff --git a/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py b/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py
index 5817879..7750d25 100644
--- a/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py
+++ b/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py
@@ -42,7 +42,7 @@
def get_serivice_params(self):
self.system = System.get_instance()
- if self.system.os_family == "suse":
+ if self.system.os_family in ["suse","debian"]:
self.service_name = "apache2"
self.httpd_conf_dir = '/etc/apache2'
else:
diff --git a/slider-agent/src/main/python/resource_management/libraries/script/script.py b/slider-agent/src/main/python/resource_management/libraries/script/script.py
index 624d65e..00b80b4 100644
--- a/slider-agent/src/main/python/resource_management/libraries/script/script.py
+++ b/slider-agent/src/main/python/resource_management/libraries/script/script.py
@@ -24,6 +24,7 @@
import sys
import json
import logging
+import shutil
from resource_management.core.environment import Environment
from resource_management.core.exceptions import Fail, ClientComponentHasNoStatus, ComponentIsNotRunning
@@ -32,6 +33,7 @@
from resource_management.core.resources import Directory
from resource_management.libraries.script.config_dictionary import ConfigDictionary
from resource_management.libraries.script.repo_installer import RepoInstaller
+from resource_management.core.logger import Logger
USAGE = """Usage: {0} <COMMAND> <JSON_CONFIG> <BASEDIR> <STROUTPUT> <LOGGING_LEVEL>
@@ -178,12 +180,23 @@
Directory(install_location, action = "delete")
Directory(install_location)
Tarball(tarball, location=install_location)
+ elif type.lower() == "folder":
+ if name.startswith(os.path.sep):
+ src = name
+ else:
+ basedir = env.config.basedir
+ src = os.path.join(basedir, name)
+ dest = config['configurations']['global']['app_install_dir']
+ Directory(dest, action = "delete")
+ Logger.info("Copying from " + src + " to " + dest)
+ shutil.copytree(src, dest)
else:
if not repo_installed:
RepoInstaller.install_repos(config)
repo_installed = True
Package(name)
- except KeyError:
+ except KeyError, e:
+ Logger.info("Error installing packages. " + repr(e))
pass # No reason to worry
#RepoInstaller.remove_repos(config)
diff --git a/slider-agent/src/test/python/agent/TestActionQueue.py b/slider-agent/src/test/python/agent/TestActionQueue.py
index b3a840c..8071ee8 100644
--- a/slider-agent/src/test/python/agent/TestActionQueue.py
+++ b/slider-agent/src/test/python/agent/TestActionQueue.py
@@ -209,7 +209,7 @@
@patch("traceback.print_exc")
@patch.object(ActionQueue, "execute_command")
@patch.object(ActionQueue, "execute_status_command")
- def test_process_command(self, execute_status_command_mock,
+ def test_process_command2(self, execute_status_command_mock,
execute_command_mock, print_exc_mock):
dummy_controller = MagicMock()
actionQueue = ActionQueue(AgentConfig("", ""), dummy_controller)
@@ -272,6 +272,7 @@
def test_execute_command(self, status_update_callback_mock, open_mock, json_load_mock,
resolve_script_path_mock):
+ self.assertEqual.__self__.maxDiff = None
tempdir = tempfile.gettempdir()
config = MagicMock()
config.get.return_value = "something"
@@ -342,7 +343,8 @@
'role': u'HBASE_MASTER',
'actionId': '1-1',
'taskId': 3,
- 'exitcode': 777}
+ 'exitcode': 777,
+ 'reportResult': True}
self.assertEqual(report['reports'][0], expected)
# Continue command execution
unfreeze_flag.set()
@@ -365,7 +367,8 @@
'structuredOut': '',
'exitcode': 0,
'allocatedPorts': {},
- 'folders': {'AGENT_LOG_ROOT': tempdir, 'AGENT_WORK_ROOT': tempdir}}
+ 'folders': {'AGENT_LOG_ROOT': tempdir, 'AGENT_WORK_ROOT': tempdir},
+ 'reportResult': True}
self.assertEqual(len(report['reports']), 1)
self.assertEqual(report['reports'][0], expected)
self.assertTrue(os.path.isfile(configname))
@@ -403,7 +406,8 @@
'actionId': '1-1',
'taskId': 3,
'structuredOut': '',
- 'exitcode': 13}
+ 'exitcode': 13,
+ 'reportResult': True}
self.assertEqual(len(report['reports']), 1)
self.assertEqual(report['reports'][0], expected)
diff --git a/slider-agent/src/test/python/agent/TestCommandStatusDict.py b/slider-agent/src/test/python/agent/TestCommandStatusDict.py
new file mode 100644
index 0000000..ee91de6
--- /dev/null
+++ b/slider-agent/src/test/python/agent/TestCommandStatusDict.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import StringIO
+import ssl
+import unittest, threading
+import sys
+from CommandStatusDict import CommandStatusDict
+from mock.mock import patch, MagicMock, call, Mock
+import logging
+from threading import Event
+
+class TestCommandStatusDict(unittest.TestCase):
+
+ logger = logging.getLogger()
+
+ auto_hbase_install_command = {
+ 'commandType': 'EXECUTION_COMMAND',
+ 'role': u'HBASE',
+ 'roleCommand': u'INSTALL',
+ 'commandId': '1-1',
+ 'taskId': 7,
+ "componentName": "HBASE_MASTER",
+ 'clusterName': u'cc',
+ 'serviceName': u'HDFS',
+ 'auto_generated': True
+ }
+
+ @patch("__builtin__.open")
+ def test_generate_progress_report(self, open_mock):
+ csd = CommandStatusDict(None)
+ report = {}
+ report['tmpout'] = None
+ report['tmperr'] = None
+ report['structuredOut'] = None
+
+ # Make file read calls visible
+ def open_side_effect(file, mode):
+ if mode == 'r':
+ file_mock = MagicMock()
+ file_mock.read.return_value = "Read from " + str(file)
+ return file_mock
+ else:
+ return self.original_open(file, mode)
+
+ open_mock.side_effect = open_side_effect
+
+ inprogress = csd.generate_in_progress_report(self.auto_hbase_install_command, report)
+ expected = {
+ 'status': 'IN_PROGRESS',
+ 'stderr': 'Read from None',
+ 'stdout': 'Read from None',
+ 'clusterName': u'cc',
+ 'structuredOut': '{}',
+ 'reportResult': False,
+ 'roleCommand': u'INSTALL',
+ 'serviceName': u'HDFS',
+ 'role': u'HBASE',
+ 'actionId': '1-1',
+ 'taskId': 7,
+ 'exitcode': 777}
+ self.assertEqual(inprogress, expected)
+
+ self.auto_hbase_install_command['auto_generated'] = False
+ inprogress = csd.generate_in_progress_report(self.auto_hbase_install_command, report)
+ expected['reportResult'] = True
+ self.assertEqual(inprogress, expected)
+ pass
+
+if __name__ == "__main__":
+ logging.basicConfig(format='%(asctime)s %(message)s',level=logging.DEBUG)
+ unittest.main()
+
+
+
+
diff --git a/slider-agent/src/test/python/agent/TestController.py b/slider-agent/src/test/python/agent/TestController.py
index 939e63f..401d69a 100644
--- a/slider-agent/src/test/python/agent/TestController.py
+++ b/slider-agent/src/test/python/agent/TestController.py
@@ -42,7 +42,7 @@
@patch.object(hostname, "hostname")
def setUp(self, hostname_method, NetUtil_mock, lockMock, threadMock):
- Controller.logger = MagicMock()
+ #Controller.logger = MagicMock()
lockMock.return_value = MagicMock()
NetUtil_mock.return_value = MagicMock()
hostname_method.return_value = "test_hostname"
@@ -55,6 +55,7 @@
self.controller = Controller.Controller(config)
self.controller.netutil.MINIMUM_INTERVAL_BETWEEN_HEARTBEATS = 0.1
self.controller.netutil.HEARTBEAT_NOT_IDDLE_INTERVAL_SEC = 0.1
+ self.controller.actionQueue = ActionQueue.ActionQueue(config, self.controller)
@patch("json.dumps")
@@ -152,9 +153,9 @@
@patch("urllib2.build_opener")
@patch("urllib2.install_opener")
- @patch.object(ActionQueue.ActionQueue, "run")
+ @patch.object(ActionQueue.ActionQueue, "start")
def test_repeatRegistration(self,
- run_mock, installMock, buildMock):
+ start_mock, installMock, buildMock):
registerAndHeartbeat = MagicMock(name="registerAndHeartbeat")
@@ -162,6 +163,7 @@
self.controller.run()
self.assertTrue(installMock.called)
self.assertTrue(buildMock.called)
+ self.assertTrue(start_mock.called)
self.controller.registerAndHeartbeat.assert_called_once_with()
calls = []
@@ -288,7 +290,7 @@
self.controller.sendRequest = sendRequest
self.controller.responseId = 1
- response = {"responseId":"2", "restartAgent":"false"}
+ response = {"responseId":"2", "restartAgent": False}
loadsMock.return_value = response
def one_heartbeat(*args, **kwargs):
@@ -588,9 +590,139 @@
self.controller.config = original_value
pass
+ def test_create_start_command(self):
+ stored_command = {
+ 'commandType': 'EXECUTION_COMMAND',
+ 'role': u'HBASE_MASTER',
+ "componentName": "HBASE_MASTER",
+ 'roleCommand': u'INSTALL',
+ 'commandId': '1-1',
+ 'taskId': 3,
+ 'clusterName': u'cc',
+ 'serviceName': u'HBASE',
+ 'configurations': {'global': {}},
+ 'configurationTags': {'global': {'tag': 'v1'}},
+ 'auto_generated': False,
+ 'roleParams': {'auto_restart':'false'},
+ 'commandParams': {'script_type': 'PYTHON',
+ 'script': 'scripts/abc.py',
+ 'command_timeout': '600'}
+ }
+
+ expected = {
+ 'commandType': 'EXECUTION_COMMAND',
+ 'role': u'HBASE_MASTER',
+ "componentName": "HBASE_MASTER",
+ 'roleCommand': u'INSTALL',
+ 'commandId': '4-1',
+ 'taskId': 4,
+ 'clusterName': u'cc',
+ 'serviceName': u'HBASE',
+ 'configurations': {'global': {}},
+ 'configurationTags': {'global': {'tag': 'v1'}},
+ 'auto_generated': False,
+ 'roleParams': {'auto_restart':'false'},
+ 'commandParams': {'script_type': 'PYTHON',
+ 'script': 'scripts/abc.py',
+ 'command_timeout': '600'},
+ 'auto_generated': True
+ }
+
+ modified_command = self.controller.create_start_command(stored_command)
+ self.assertEqual.__self__.maxDiff = None
+ self.assertEqual(modified_command, expected)
+
+ @patch.object(Controller.Controller, "createStatusCommand")
+ @patch.object(threading._Event, "wait")
+ @patch("time.sleep")
+ @patch("json.loads")
+ @patch("json.dumps")
+ def test_auto_start(self, dumpsMock, loadsMock, timeMock, waitMock, mock_createStatusCommand):
+ original_value = self.controller.config
+ self.controller.config = AgentConfig("", "")
+ out = StringIO.StringIO()
+ sys.stdout = out
+
+ heartbeat = MagicMock()
+ self.controller.heartbeat = heartbeat
+
+ dumpsMock.return_value = "data"
+
+ sendRequest = MagicMock(name="sendRequest")
+ self.controller.sendRequest = sendRequest
+
+ self.controller.responseId = 1
+ response1 = {"responseId": "2", "restartAgent": False, "restartEnabled": True}
+ response2 = {"responseId": "2", "restartAgent": False, "restartEnabled": False}
+ loadsMock.side_effect = [response1, response2, response1]
+
+ def one_heartbeat(*args, **kwargs):
+ self.controller.DEBUG_STOP_HEARTBEATING = True
+ return "data"
+
+ sendRequest.side_effect = one_heartbeat
+
+ actionQueue = MagicMock()
+ actionQueue.isIdle.return_value = True
+
+ # one successful request, after stop
+ self.controller.actionQueue = actionQueue
+ self.controller.componentActualState = State.FAILED
+ self.controller.componentExpectedState = State.STARTED
+ self.assertTrue(self.controller.componentActualState, State.FAILED)
+ self.controller.actionQueue.customServiceOrchestrator.stored_command = {
+ 'commandType': 'EXECUTION_COMMAND',
+ 'role': u'HBASE',
+ 'roleCommand': u'START',
+ 'commandId': '7-1',
+ 'taskId': 7,
+ "componentName": "HBASE_MASTER",
+ 'clusterName': u'cc',
+ 'serviceName': u'HDFS'
+ }
+ addToQueue = MagicMock(name="addToQueue")
+ self.controller.addToQueue = addToQueue
+
+ self.controller.heartbeatWithServer()
+ self.assertTrue(sendRequest.called)
+
+ self.assertTrue(self.controller.componentActualState, State.STARTING)
+ self.assertTrue(self.controller.componentExpectedState, State.STARTED)
+ self.assertEquals(self.controller.failureCount, 0)
+ self.assertFalse(mock_createStatusCommand.called)
+ addToQueue.assert_has_calls([call([{
+ 'commandType': 'EXECUTION_COMMAND',
+ 'clusterName': u'cc',
+ 'serviceName': u'HDFS',
+ 'role': u'HBASE',
+ 'taskId': 8,
+ 'roleCommand': u'START',
+ 'componentName': 'HBASE_MASTER',
+ 'commandId': '8-1',
+ 'auto_generated': True}])])
+ self.controller.config = original_value
+
+ # restartEnabled = False
+ self.controller.componentActualState = State.FAILED
+ self.controller.heartbeatWithServer()
+
+ self.assertTrue(sendRequest.called)
+ self.assertTrue(self.controller.componentActualState, State.FAILED)
+ self.assertTrue(self.controller.componentExpectedState, State.STARTED)
+
+ # restartEnabled = True
+ self.controller.componentActualState = State.INSTALLED
+ self.controller.componentExpectedState = State.INSTALLED
+ self.controller.heartbeatWithServer()
+
+ self.assertTrue(sendRequest.called)
+ self.assertTrue(self.controller.componentActualState, State.INSTALLED)
+ self.assertTrue(self.controller.componentExpectedState, State.INSTALLED)
+ pass
+
if __name__ == "__main__":
- logging.basicConfig(format='%(asctime)s %(message)s',level=logging.DEBUG)
+ logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
unittest.main()
diff --git a/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py b/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py
index d2439b1..e545afe 100644
--- a/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py
+++ b/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py
@@ -34,9 +34,11 @@
from mock.mock import MagicMock, patch
import StringIO
import sys
+from socket import socket
class TestCustomServiceOrchestrator(TestCase):
+
def setUp(self):
# disable stdout
out = StringIO.StringIO()
@@ -178,13 +180,13 @@
pass
- @patch.object(CustomServiceOrchestrator, "allocate_port")
+ @patch.object(CustomServiceOrchestrator, "allocate_ports")
@patch.object(CustomServiceOrchestrator, "resolve_script_path")
@patch.object(PythonExecutor, "run_file")
def test_runCommand_get_port(self,
run_file_mock,
resolve_script_path_mock,
- allocate_port_mock):
+ allocate_ports_mock):
command = {
'role': 'HBASE_REGIONSERVER',
'hostLevelParams': {
@@ -212,7 +214,7 @@
config.getWorkRootPath.return_value = tempdir
config.getLogPath.return_value = tempdir
- allocate_port_mock.return_value = 10233
+ allocate_ports_mock.return_value = str(10233)
resolve_script_path_mock.return_value = "/basedir/scriptpath"
dummy_controller = MagicMock()
@@ -225,11 +227,119 @@
}
ret = orchestrator.runCommand(command, "out.txt", "err.txt")
self.assertEqual(ret['exitcode'], 0)
- self.assertEqual(ret['allocated_ports'], {'a.port': '10233'})
+ self.assertEqual(ret['allocated_ports'], {'a.a.port': '10233'})
self.assertTrue(run_file_mock.called)
self.assertEqual(run_file_mock.call_count, 1)
+ self.assertEqual(orchestrator.allocated_ports, {'a.a.port': '10233'})
+ self.assertEqual(orchestrator.stored_command, {})
+ @patch.object(socket, "close")
+ @patch.object(socket, "connect")
+ def test_allocate_port_def(self, socket_connect_mock, socket_close_mock):
+ e = OSError()
+ socket_connect_mock.side_effect = e
+ tempdir = tempfile.gettempdir()
+ config = MagicMock()
+ config.get.return_value = "something"
+ config.getResolvedPath.return_value = tempdir
+ config.getWorkRootPath.return_value = tempdir
+ config.getLogPath.return_value = tempdir
+
+ dummy_controller = MagicMock()
+ orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+ ret = orchestrator.allocate_port(10)
+ self.assertEqual(ret, 10)
+
+ @patch.object(socket, "getsockname")
+ @patch.object(socket, "bind")
+ @patch.object(socket, "close")
+ @patch.object(socket, "connect")
+ def test_allocate_port_new(self, socket_connect_mock, socket_close_mock,
+ socket_bind_mock, socket_getsockname_mock):
+ tempdir = tempfile.gettempdir()
+ config = MagicMock()
+ config.get.return_value = "something"
+ config.getResolvedPath.return_value = tempdir
+ config.getWorkRootPath.return_value = tempdir
+ config.getLogPath.return_value = tempdir
+
+ dummy_controller = MagicMock()
+ orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+ socket_getsockname_mock.return_value = [100, 101]
+ ret = orchestrator.allocate_port(10)
+ self.assertEqual(ret, 101)
+
+ @patch.object(socket, "getsockname")
+ @patch.object(socket, "bind")
+ def test_allocate_port_no_def(self, socket_bind_mock, socket_getsockname_mock):
+ tempdir = tempfile.gettempdir()
+ config = MagicMock()
+ config.get.return_value = "something"
+ config.getResolvedPath.return_value = tempdir
+ config.getWorkRootPath.return_value = tempdir
+ config.getLogPath.return_value = tempdir
+
+ dummy_controller = MagicMock()
+ orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+ socket_getsockname_mock.return_value = [100, 102]
+ ret = orchestrator.allocate_port()
+ self.assertEqual(ret, 102)
+
+
+ @patch.object(CustomServiceOrchestrator, "is_port_available")
+ @patch.object(CustomServiceOrchestrator, "allocate_port")
+ def test_allocate_port_combinations(self, allocate_port_mock, is_port_available_mock):
+ tempdir = tempfile.gettempdir()
+ config = MagicMock()
+ config.get.return_value = "something"
+ config.getResolvedPath.return_value = tempdir
+ config.getWorkRootPath.return_value = tempdir
+ config.getLogPath.return_value = tempdir
+
+ dummy_controller = MagicMock()
+ orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+
+ is_port_available_mock.return_value = False
+ allocate_port_mock.side_effect = [101, 102, 103, 104, 105, 106]
+ ret = orchestrator.allocate_ports("1000", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "1000")
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "101")
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT},${A.ALLOCATED_PORT}", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "102,103")
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_0}", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "104")
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_0}{DO_NOT_PROPAGATE}", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "105")
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DO_NOT_PROPAGATE}", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "106")
+
+
+ @patch.object(CustomServiceOrchestrator, "is_port_available")
+ def test_allocate_port_combinations2(self, is_port_available_mock):
+ tempdir = tempfile.gettempdir()
+ config = MagicMock()
+ config.get.return_value = "something"
+ config.getResolvedPath.return_value = tempdir
+ config.getWorkRootPath.return_value = tempdir
+ config.getLogPath.return_value = tempdir
+
+ dummy_controller = MagicMock()
+ orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+
+ is_port_available_mock.return_value = True
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_1005}", "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "1005")
+
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_1005}-${A.ALLOCATED_PORT}{DEFAULT_1006}",
+ "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "1005-1006")
+
+ ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_1006}{DO_NOT_PROPAGATE}",
+ "${A.ALLOCATED_PORT}")
+ self.assertEqual(ret, "1006")
+
@patch("hostname.public_hostname")
@patch("os.path.isfile")
@patch("os.unlink")
@@ -308,9 +418,10 @@
}
ret = orchestrator.runCommand(command, "out.txt", "err.txt", True, True)
+ self.assertEqual.__self__.maxDiff = None
self.assertEqual(ret['exitcode'], 0)
self.assertTrue(run_file_mock.called)
- self.assertEqual(orchestrator.applied_configs, expected)
+ self.assertEqual(orchestrator.stored_command, command)
ret = orchestrator.requestComponentStatus(command_get)
self.assertEqual(ret['configurations'], expected)
@@ -353,8 +464,8 @@
status = orchestrator.requestComponentStatus(status_command)
self.assertEqual(CustomServiceOrchestrator.DEAD_STATUS, status['exitcode'])
- @patch.object(CustomServiceOrchestrator, "allocate_port")
- def test_finalize_command(self, mock_allocate_port):
+ @patch.object(CustomServiceOrchestrator, "allocate_ports")
+ def test_finalize_command(self, mock_allocate_ports):
dummy_controller = MagicMock()
tempdir = tempfile.gettempdir()
tempWorkDir = tempdir + "W"
@@ -363,7 +474,7 @@
config.getResolvedPath.return_value = tempdir
config.getWorkRootPath.return_value = tempWorkDir
config.getLogPath.return_value = tempdir
- mock_allocate_port.return_value = "10023"
+ mock_allocate_ports.return_value = "10023"
orchestrator = CustomServiceOrchestrator(config, dummy_controller)
command = {}
@@ -377,16 +488,24 @@
command['configurations']['oozie-site'] = {}
command['configurations']['oozie-site']['log_root'] = "${AGENT_LOG_ROOT}"
command['configurations']['oozie-site']['a_port'] = "${HBASE_MASTER.ALLOCATED_PORT}"
+ command['configurations']['oozie-site']['ignore_port1'] = "[${HBASE_RS.ALLOCATED_PORT}]"
+ command['configurations']['oozie-site']['ignore_port2'] = "[${HBASE_RS.ALLOCATED_PORT},${HBASE_REST.ALLOCATED_PORT}{DO_NOT_PROPAGATE}]"
+ command['configurations']['oozie-site']['ignore_port3'] = "[${HBASE_RS.ALLOCATED_PORT}{a}{b}{c},${A.ALLOCATED_PORT}{DO_NOT_PROPAGATE},${A.ALLOCATED_PORT}{DEFAULT_3}{DO_NOT_PROPAGATE}]"
+ command['configurations']['oozie-site']['ignore_port4'] = "${HBASE_RS}{a}{b}{c}"
allocated_ports = {}
orchestrator.finalize_command(command, False, allocated_ports)
self.assertEqual(command['configurations']['hbase-site']['work_root'], tempWorkDir)
self.assertEqual(command['configurations']['oozie-site']['log_root'], tempdir)
self.assertEqual(command['configurations']['oozie-site']['a_port'], "10023")
- self.assertEqual(orchestrator.applied_configs, {})
+ self.assertEqual(command['configurations']['oozie-site']['ignore_port1'], "[0]")
+ self.assertEqual(command['configurations']['oozie-site']['ignore_port2'], "[0,0]")
+ self.assertEqual(command['configurations']['oozie-site']['ignore_port3'], "[0,0,0]")
+ self.assertEqual(command['configurations']['oozie-site']['ignore_port4'], "${HBASE_RS}{a}{b}{c}")
+ self.assertEqual(orchestrator.stored_command, {})
self.assertEqual(len(allocated_ports), 1)
- self.assertTrue('a_port' in allocated_ports)
- self.assertEqual(allocated_ports['a_port'], '10023')
+ self.assertTrue('oozie-site.a_port' in allocated_ports)
+ self.assertEqual(allocated_ports['oozie-site.a_port'], '10023')
command['configurations']['hbase-site']['work_root'] = "${AGENT_WORK_ROOT}"
command['configurations']['hbase-site']['log_root'] = "${AGENT_LOG_ROOT}/log"
@@ -397,8 +516,8 @@
orchestrator.finalize_command(command, True, {})
self.assertEqual(command['configurations']['hbase-site']['log_root'], tempdir + "/log")
self.assertEqual(command['configurations']['hbase-site']['blog_root'], "/b/" + tempdir + "/log")
- self.assertEqual(command['configurations']['oozie-site']['b_port'], "${HBASE_REGIONSERVER.ALLOCATED_PORT}")
- self.assertEqual(orchestrator.applied_configs, command['configurations'])
+ self.assertEqual(command['configurations']['oozie-site']['b_port'], "0")
+ self.assertEqual(orchestrator.stored_command, command)
def test_port_allocation(self):
diff --git a/slider-agent/src/test/python/agent/TestGrep.py b/slider-agent/src/test/python/agent/TestGrep.py
index 75f0093..351befb 100644
--- a/slider-agent/src/test/python/agent/TestGrep.py
+++ b/slider-agent/src/test/python/agent/TestGrep.py
@@ -19,7 +19,7 @@
'''
from unittest import TestCase
-from agent.Grep import Grep
+from Grep import Grep
import socket
import os, sys
import logging
diff --git a/slider-agent/src/test/python/agent/TestHeartbeat.py b/slider-agent/src/test/python/agent/TestHeartbeat.py
index b60c14c..b012218 100644
--- a/slider-agent/src/test/python/agent/TestHeartbeat.py
+++ b/slider-agent/src/test/python/agent/TestHeartbeat.py
@@ -30,7 +30,7 @@
import StringIO
import sys
import logging
-
+from Controller import State
class TestHeartbeat(TestCase):
def setUp(self):
@@ -64,7 +64,7 @@
self.assertEquals(result['nodeStatus']['cause'], "NONE")
self.assertEquals(result['nodeStatus']['status'], "HEALTHY")
# result may or may NOT have an agentEnv structure in it
- self.assertEquals((len(result) is 5) or (len(result) is 6), True)
+ self.assertEquals((len(result) is 6) or (len(result) is 7), True)
self.assertEquals(not heartbeat.reports, True,
"Heartbeat should not contain task in progress")
@@ -85,7 +85,8 @@
'role': u'DATANODE',
'actionId': '1-1',
'taskId': 3,
- 'exitcode': 777},
+ 'exitcode': 777,
+ 'reportResult' : True},
{'status': 'COMPLETED',
'stderr': 'stderr',
@@ -96,7 +97,8 @@
'role': 'role',
'actionId': 17,
'taskId': 'taskId',
- 'exitcode': 0},
+ 'exitcode': 0,
+ 'reportResult' : True},
{'status': 'FAILED',
'stderr': 'stderr',
@@ -107,7 +109,8 @@
'role': u'DATANODE',
'actionId': '1-1',
'taskId': 3,
- 'exitcode': 13},
+ 'exitcode': 13,
+ 'reportResult' : True},
{'status': 'COMPLETED',
'stderr': 'stderr',
@@ -119,8 +122,21 @@
'role': u'DATANODE',
'actionId': '1-1',
'taskId': 3,
- 'exitcode': 0}
+ 'exitcode': 0,
+ 'reportResult' : True},
+ {'status': 'COMPLETED',
+ 'stderr': 'stderr',
+ 'stdout': 'out',
+ 'clusterName': u'cc',
+ 'configurationTags': {'global': {'tag': 'v1'}},
+ 'roleCommand': u'INSTALL',
+ 'serviceName': u'HDFS',
+ 'role': u'DATANODE',
+ 'actionId': '1-1',
+ 'taskId': 3,
+ 'exitcode': 0,
+ 'reportResult' : False}
],
'componentStatus': [
{'status': 'HEALTHY', 'componentName': 'DATANODE', 'reportResult' : True},
@@ -129,6 +145,7 @@
],
}
heartbeat = Heartbeat(actionQueue, config)
+ # State.STARTED results in agentState to be set to 4 (enum order)
hb = heartbeat.build({}, 10)
hb['hostname'] = 'hostname'
hb['timestamp'] = 'timestamp'
@@ -157,8 +174,84 @@
'stderr': 'stderr'}], 'componentStatus': [
{'status': 'HEALTHY', 'componentName': 'DATANODE'},
{'status': 'UNHEALTHY', 'componentName': 'NAMENODE'}]}
+ self.assertEqual.__self__.maxDiff = None
self.assertEquals(hb, expected)
+ @patch.object(ActionQueue, "result")
+ def test_build_result2(self, result_mock):
+ config = AgentConfig("", "")
+ config.set('agent', 'prefix', 'tmp')
+ dummy_controller = MagicMock()
+ actionQueue = ActionQueue(config, dummy_controller)
+ result_mock.return_value = {
+ 'reports': [{'status': 'IN_PROGRESS',
+ 'stderr': 'Read from /tmp/errors-3.txt',
+ 'stdout': 'Read from /tmp/output-3.txt',
+ 'clusterName': u'cc',
+ 'roleCommand': u'INSTALL',
+ 'serviceName': u'HDFS',
+ 'role': u'DATANODE',
+ 'actionId': '1-1',
+ 'taskId': 3,
+ 'exitcode': 777,
+ 'reportResult' : False}
+ ],
+ 'componentStatus': []
+ }
+ heartbeat = Heartbeat(actionQueue, config)
+
+ commandResult = {}
+ hb = heartbeat.build(commandResult, 10)
+ hb['hostname'] = 'hostname'
+ hb['timestamp'] = 'timestamp'
+ hb['fqdn'] = 'fqdn'
+ expected = {'nodeStatus':
+ {'status': 'HEALTHY',
+ 'cause': 'NONE'},
+ 'timestamp': 'timestamp', 'hostname': 'hostname', 'fqdn': 'fqdn',
+ 'responseId': 10, 'reports': []}
+ self.assertEqual.__self__.maxDiff = None
+ self.assertEquals(hb, expected)
+ self.assertEquals(commandResult, {'commandStatus': 'IN_PROGRESS'})
+
+ @patch.object(ActionQueue, "result")
+ def test_build_result3(self, result_mock):
+ config = AgentConfig("", "")
+ config.set('agent', 'prefix', 'tmp')
+ dummy_controller = MagicMock()
+ actionQueue = ActionQueue(config, dummy_controller)
+ result_mock.return_value = {
+ 'reports': [{'status': 'COMPLETED',
+ 'stderr': 'Read from /tmp/errors-3.txt',
+ 'stdout': 'Read from /tmp/output-3.txt',
+ 'clusterName': u'cc',
+ 'roleCommand': u'INSTALL',
+ 'serviceName': u'HDFS',
+ 'role': u'DATANODE',
+ 'actionId': '1-1',
+ 'taskId': 3,
+ 'exitcode': 777,
+ 'reportResult' : False}
+ ],
+ 'componentStatus': []
+ }
+ heartbeat = Heartbeat(actionQueue, config)
+
+ commandResult = {}
+ hb = heartbeat.build(commandResult, 10)
+ hb['hostname'] = 'hostname'
+ hb['timestamp'] = 'timestamp'
+ hb['fqdn'] = 'fqdn'
+ expected = {'nodeStatus':
+ {'status': 'HEALTHY',
+ 'cause': 'NONE'},
+ 'timestamp': 'timestamp', 'hostname': 'hostname', 'fqdn': 'fqdn',
+ 'responseId': 10, 'reports': []}
+ self.assertEqual.__self__.maxDiff = None
+ self.assertEquals(hb, expected)
+ self.assertEquals(commandResult, {'commandStatus': 'COMPLETED'})
+
+
if __name__ == "__main__":
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
diff --git a/slider-agent/src/test/python/agent/TestMain.py b/slider-agent/src/test/python/agent/TestMain.py
index 9ef1cad..bc68582 100644
--- a/slider-agent/src/test/python/agent/TestMain.py
+++ b/slider-agent/src/test/python/agent/TestMain.py
@@ -20,19 +20,20 @@
import StringIO
import sys
-from agent import NetUtil, security
+import NetUtil, security
from mock.mock import MagicMock, patch, ANY
import unittest
-from agent import ProcessHelper, main
+import ProcessHelper, main
import logging
import signal
-from agent.AgentConfig import AgentConfig
+from AgentConfig import AgentConfig
import ConfigParser
import os
import tempfile
-from agent.Controller import Controller
+from Controller import Controller
from optparse import OptionParser
+logger = logging.getLogger()
class TestMain(unittest.TestCase):
def setUp(self):
@@ -148,8 +149,7 @@
@patch("sys.exit")
@patch("os.path.isfile")
@patch("os.path.isdir")
- @patch("hostname.hostname")
- def test_perform_prestart_checks(self, hostname_mock, isdir_mock, isfile_mock,
+ def test_perform_prestart_checks(self, isdir_mock, isfile_mock,
exit_mock, remove_mock):
main.config = AgentConfig("", "")
@@ -221,7 +221,6 @@
@patch.object(main, "setup_logging")
@patch.object(main, "bind_signal_handlers")
- @patch.object(main, "stop_agent")
@patch.object(main, "update_config_from_file")
@patch.object(main, "perform_prestart_checks")
@patch.object(main, "write_pid")
@@ -231,13 +230,15 @@
@patch.object(Controller, "start")
@patch.object(Controller, "join")
@patch("optparse.OptionParser.parse_args")
- def test_main(self, parse_args_mock, join_mock, start_mock,
+ @patch.object(Controller, "is_alive")
+ def test_main(self, isAlive_mock, parse_args_mock, join_mock, start_mock,
Controller_init_mock, try_to_connect_mock,
update_log_level_mock, write_pid_mock,
perform_prestart_checks_mock,
- update_config_from_file_mock, stop_mock,
+ update_config_from_file_mock,
bind_signal_handlers_mock, setup_logging_mock):
Controller_init_mock.return_value = None
+ isAlive_mock.return_value = False
options = MagicMock()
parse_args_mock.return_value = (options, MagicMock)
@@ -245,25 +246,24 @@
#testing call without command-line arguments
os.environ["AGENT_WORK_ROOT"] = os.path.join(tmpdir, "work")
- os.environ["AGENT_LOG_ROOT"] = os.path.join(tmpdir, "log")
+ os.environ["AGENT_LOG_ROOT"] = ",".join([os.path.join(tmpdir, "log"),os.path.join(tmpdir, "log2")])
main.main()
self.assertTrue(setup_logging_mock.called)
self.assertTrue(bind_signal_handlers_mock.called)
- self.assertTrue(stop_mock.called)
self.assertTrue(update_config_from_file_mock.called)
self.assertTrue(perform_prestart_checks_mock.called)
self.assertTrue(write_pid_mock.called)
self.assertTrue(update_log_level_mock.called)
+ self.assertTrue(options.log_folder == os.path.join(tmpdir, "log"))
try_to_connect_mock.assert_called_once_with(ANY, -1, ANY)
self.assertTrue(start_mock.called)
class AgentOptions:
- def __init__(self, label, host, port, secured_port, verbose, debug):
+ def __init__(self, label, zk_quorum, zk_reg_path, verbose, debug):
self.label = label
- self.host = host
- self.port = port
- self.secured_port = secured_port
+ self.zk_quorum = zk_quorum
+ self.zk_reg_path = zk_reg_path
self.verbose = verbose
self.debug = debug
@@ -281,7 +281,7 @@
@patch.object(Controller, "join")
@patch.object(Controller, "is_alive")
@patch("optparse.OptionParser.parse_args")
- def test_main(self, parse_args_mock, isAlive_mock, join_mock, start_mock,
+ def test_main2(self, parse_args_mock, isAlive_mock, join_mock, start_mock,
Controller_init_mock, AgentConfig_set_mock,
try_to_connect_mock,
update_log_level_mock, write_pid_mock,
@@ -291,18 +291,18 @@
Controller_init_mock.return_value = None
isAlive_mock.return_value = False
parse_args_mock.return_value = (
- TestMain.AgentOptions("agent", "host1", "8080", "8081", True, ""), [])
+ TestMain.AgentOptions("agent", "host1:2181", "/registry/org-apache-slider/cl1", True, ""), [])
tmpdir = tempfile.gettempdir()
#testing call without command-line arguments
os.environ["AGENT_WORK_ROOT"] = os.path.join(tmpdir, "work")
os.environ["AGENT_LOG_ROOT"] = os.path.join(tmpdir, "log")
main.main()
- self.assertTrue(AgentConfig_set_mock.call_count == 4)
- AgentConfig_set_mock.assert_any_call("server", "hostname", "host1")
- AgentConfig_set_mock.assert_any_call("server", "port", "8080")
- AgentConfig_set_mock.assert_any_call("server", "secured_port", "8081")
+ self.assertTrue(AgentConfig_set_mock.call_count == 3)
+ AgentConfig_set_mock.assert_any_call("server", "zk_quorum", "host1:2181")
+ AgentConfig_set_mock.assert_any_call("server", "zk_reg_path", "/registry/org-apache-slider/cl1")
if __name__ == "__main__":
+ logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
unittest.main()
\ No newline at end of file
diff --git a/slider-agent/src/test/python/agent/TestRegistration.py b/slider-agent/src/test/python/agent/TestRegistration.py
index 356e480..f91fe29 100644
--- a/slider-agent/src/test/python/agent/TestRegistration.py
+++ b/slider-agent/src/test/python/agent/TestRegistration.py
@@ -25,8 +25,9 @@
import tempfile
from mock.mock import patch
from mock.mock import MagicMock
-from agent.Register import Register
-from agent.AgentConfig import AgentConfig
+from Register import Register
+from Controller import State
+from AgentConfig import AgentConfig
class TestRegistration(TestCase):
@@ -47,20 +48,23 @@
text_file.write("1.3.0")
register = Register(config)
- data = register.build(1)
+ data = register.build(State.INIT, State.INIT, {}, 1)
#print ("Register: " + pprint.pformat(data))
self.assertEquals(data['hostname'] != "", True, "hostname should not be empty")
self.assertEquals(data['publicHostname'] != "", True, "publicHostname should not be empty")
self.assertEquals(data['responseId'], 1)
self.assertEquals(data['timestamp'] > 1353678475465L, True, "timestamp should not be empty")
self.assertEquals(data['agentVersion'], '1.3.0', "agentVersion should not be empty")
- self.assertEquals(len(data), 5)
+ self.assertEquals(data['actualState'], State.INIT, "actualState should not be empty")
+ self.assertEquals(data['expectedState'], State.INIT, "expectedState should not be empty")
+ self.assertEquals(data['allocatedPorts'], {}, "allocatedPorts should be empty")
+ self.assertEquals(len(data), 8)
self.assertEquals(os.path.join(tmpdir, "app/definition"), config.getResolvedPath("app_pkg_dir"))
self.assertEquals(os.path.join(tmpdir, "app/install"), config.getResolvedPath("app_install_dir"))
- self.assertEquals(os.path.join(ver_dir, "app/log"), config.getResolvedPath("app_log_dir"))
- self.assertEquals(os.path.join(ver_dir, "infra/log"), config.getResolvedPath("log_dir"))
- self.assertEquals(os.path.join(ver_dir, "app/command-log"), config.getResolvedPath("app_task_dir"))
+ self.assertEquals(os.path.join(ver_dir, "."), config.getResolvedPath("app_log_dir"))
+ self.assertEquals(os.path.join(ver_dir, "."), config.getResolvedPath("log_dir"))
+ self.assertEquals(os.path.join(ver_dir, "."), config.getResolvedPath("app_task_dir"))
os.remove(ver_file)
os.removedirs(ver_dir)
diff --git a/slider-agent/src/test/python/agent/TestShell.py b/slider-agent/src/test/python/agent/TestShell.py
index 32a8d11..8caed7b 100644
--- a/slider-agent/src/test/python/agent/TestShell.py
+++ b/slider-agent/src/test/python/agent/TestShell.py
@@ -26,36 +26,47 @@
from agent import shell
from sys import platform as _platform
import subprocess, time
+import sys
+import platform
class TestShell(unittest.TestCase):
+ unsupported_for_test = []
+ def linux_distribution(self):
+ PYTHON_VER = sys.version_info[0] * 10 + sys.version_info[1]
- @patch("os.setuid")
- def test_changeUid(self, os_setUIDMock):
- shell.threadLocal.uid = 9999
- shell.changeUid()
- self.assertTrue(os_setUIDMock.called)
+ if PYTHON_VER < 26:
+ linux_dist = platform.dist()
+ else:
+ linux_dist = platform.linux_distribution()
+
+ return linux_dist
def test_kill_process_with_children(self):
+ dist = self.linux_distribution()
+ operatingSystem = dist[0].lower()
+ if operatingSystem in self.unsupported_for_test:
+ return
+
if _platform == "linux" or _platform == "linux2": # Test is Linux-specific
gracefull_kill_delay_old = shell.gracefull_kill_delay
shell.gracefull_kill_delay = 0.1
- sleep_cmd = "sleep 314159265"
- test_cmd = """ (({0}) | ({0} | {0})) """.format(sleep_cmd)
+ sleep_cmd = "sleep 10"
+ test_cmd = """ (({0}) & ({0} & {0})) """.format(sleep_cmd)
# Starting process tree (multiple process groups)
- test_process = subprocess.Popen(test_cmd, shell=True)
+ test_process = subprocess.Popen(test_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
time.sleep(0.3) # Delay to allow subprocess to start
# Check if processes are running
- ps_cmd = """ps aux | grep "{0}" | grep -v grep """.format(sleep_cmd)
- ps_process = subprocess.Popen(ps_cmd, stdout=subprocess.PIPE, shell=True)
+ ps_cmd = """ps aux """
+ ps_process = subprocess.Popen(ps_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
(out, err) = ps_process.communicate()
self.assertTrue(sleep_cmd in out)
# Kill test process
shell.kill_process_with_children(test_process.pid)
test_process.communicate()
# Now test process should not be running
- ps_process = subprocess.Popen(ps_cmd, stdout=subprocess.PIPE, shell=True)
+ ps_process = subprocess.Popen(ps_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
(out, err) = ps_process.communicate()
self.assertFalse(sleep_cmd in out)
shell.gracefull_kill_delay = gracefull_kill_delay_old
diff --git a/slider-agent/src/test/python/resource_management/TestDirectoryResource.py b/slider-agent/src/test/python/resource_management/TestDirectoryResource.py
index 866486e..d9a262c 100644
--- a/slider-agent/src/test/python/resource_management/TestDirectoryResource.py
+++ b/slider-agent/src/test/python/resource_management/TestDirectoryResource.py
@@ -26,7 +26,7 @@
from resource_management.core.resources import Directory
@patch.object(System, "os_family", new = 'redhat')
-class TestFileResource(TestCase):
+class TestDirectoryResource(TestCase):
@patch.object(os.path, "exists")
@patch.object(os, "makedirs")
diff --git a/slider-agent/src/test/python/resource_management/TestGroupResource.py b/slider-agent/src/test/python/resource_management/TestGroupResource.py.disabled
similarity index 100%
rename from slider-agent/src/test/python/resource_management/TestGroupResource.py
rename to slider-agent/src/test/python/resource_management/TestGroupResource.py.disabled
diff --git a/slider-agent/src/test/python/resource_management/TestUserResource.py b/slider-agent/src/test/python/resource_management/TestUserResource.py.disabled
similarity index 100%
rename from slider-agent/src/test/python/resource_management/TestUserResource.py
rename to slider-agent/src/test/python/resource_management/TestUserResource.py.disabled
diff --git a/slider-agent/src/test/python/unitTests.py b/slider-agent/src/test/python/unitTests.py
index 0d822fd..e3f2d7c 100644
--- a/slider-agent/src/test/python/unitTests.py
+++ b/slider-agent/src/test/python/unitTests.py
@@ -54,7 +54,8 @@
for directory in os.listdir(src_dir):
if os.path.isdir(directory) and not directory in ignoredDirs:
files_list += os.listdir(src_dir + os.sep + directory)
- shuffle(files_list)
+ ## temporarily deleting to add more predictability
+ ## shuffle(files_list)
tests_list = []
logger.info('------------------------TESTS LIST:-------------------------------------')
diff --git a/slider-assembly/pom.xml b/slider-assembly/pom.xml
index 0ec91dd..fab8230 100644
--- a/slider-assembly/pom.xml
+++ b/slider-assembly/pom.xml
@@ -23,7 +23,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
</parent>
@@ -141,26 +141,12 @@
</configuration>
<executions>
<execution>
- <id>clean</id>
- <goals>
- <goal>enforce</goal>
- </goals>
- <phase>pre-clean</phase>
- </execution>
- <execution>
<id>default</id>
<goals>
<goal>enforce</goal>
</goals>
<phase>validate</phase>
</execution>
- <execution>
- <id>site</id>
- <goals>
- <goal>enforce</goal>
- </goals>
- <phase>pre-site</phase>
- </execution>
</executions>
</plugin>
@@ -308,9 +294,9 @@
<sources>
<source>
<location>${project.build.directory}/agent</location>
- <includes>
- <include>slider-agent.tar.gz</include>
- </includes>
+ <includes>
+ <include>slider-agent.tar.gz</include>
+ </includes>
</source>
</sources>
</mapping>
diff --git a/slider-install/README.md b/slider-assembly/src/main/bash/README.md
similarity index 100%
rename from slider-install/README.md
rename to slider-assembly/src/main/bash/README.md
diff --git a/slider-install/src/main/bash/slider-client.xml b/slider-assembly/src/main/bash/slider-client.xml
similarity index 100%
rename from slider-install/src/main/bash/slider-client.xml
rename to slider-assembly/src/main/bash/slider-client.xml
diff --git a/slider-install/src/main/bash/slider_destroy b/slider-assembly/src/main/bash/slider_destroy
similarity index 100%
rename from slider-install/src/main/bash/slider_destroy
rename to slider-assembly/src/main/bash/slider_destroy
diff --git a/slider-install/src/main/bash/slider_setup b/slider-assembly/src/main/bash/slider_setup
similarity index 100%
rename from slider-install/src/main/bash/slider_setup
rename to slider-assembly/src/main/bash/slider_setup
diff --git a/slider-install/src/main/bash/slider_setup.conf b/slider-assembly/src/main/bash/slider_setup.conf
similarity index 100%
rename from slider-install/src/main/bash/slider_setup.conf
rename to slider-assembly/src/main/bash/slider_setup.conf
diff --git a/slider-core/pom.xml b/slider-core/pom.xml
index 95a7e71..9a16ab3 100644
--- a/slider-core/pom.xml
+++ b/slider-core/pom.xml
@@ -23,7 +23,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
</parent>
<build>
@@ -62,14 +62,6 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -317,48 +309,7 @@
<artifactId>hadoop-minicluster</artifactId>
<scope>test</scope>
</dependency>
-<!--
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-client</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <scope>test</scope>
- </dependency>
-
--->
-
-<!--
-
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-core</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-minicluster</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-start</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-trace</artifactId>
- <scope>test</scope>
- </dependency>
--->
<dependency>
<groupId>junit</groupId>
@@ -406,6 +357,11 @@
<artifactId>commons-logging</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ </dependency>
+
<!-- ======================================================== -->
<!-- service registry -->
<!-- ======================================================== -->
@@ -450,6 +406,11 @@
</dependency>
<dependency>
+ <groupId>javax.xml.bind</groupId>
+ <artifactId>jaxb-api</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-client</artifactId>
</dependency>
@@ -510,8 +471,17 @@
<dependency>
<groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
<artifactId>jetty-sslengine</artifactId>
- <scope>compile</scope>
</dependency>
</dependencies>
diff --git a/slider-core/src/main/java/org/apache/slider/Slider.java b/slider-core/src/main/java/org/apache/slider/Slider.java
index 0d25f00..5fc8618 100644
--- a/slider-core/src/main/java/org/apache/slider/Slider.java
+++ b/slider-core/src/main/java/org/apache/slider/Slider.java
@@ -42,7 +42,7 @@
//turn the args to a list
List<String> argsList = Arrays.asList(args);
//create a new list, as the ArrayList type doesn't push() on an insert
- List<String> extendedArgs = new ArrayList<>(argsList);
+ List<String> extendedArgs = new ArrayList<String>(argsList);
//insert the service name
extendedArgs.add(0, SERVICE_CLASSNAME);
//now have the service launcher do its work
diff --git a/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java b/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java
index d5869a6..d875d66 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java
@@ -165,40 +165,40 @@
* the Slider AM and the application that it deploys
*/
public Map<String, String> options =
- new HashMap<>();
+ new HashMap<String, String>();
/**
* cluster information
* This is only valid when querying the cluster status.
*/
public Map<String, String> info =
- new HashMap<>();
+ new HashMap<String, String>();
/**
* Statistics. This is only relevant when querying the cluster status
*/
public Map<String, Map<String, Integer>> statistics =
- new HashMap<>();
+ new HashMap<String, Map<String, Integer>>();
/**
* Instances: role->count
*/
public Map<String, List<String>> instances =
- new HashMap<>();
+ new HashMap<String, List<String>>();
/**
* Role options,
* role -> option -> value
*/
public Map<String, Map<String, String>> roles =
- new HashMap<>();
+ new HashMap<String, Map<String, String>>();
/**
* List of key-value pairs to add to a client config to set up the client
*/
public Map<String, String> clientProperties =
- new HashMap<>();
+ new HashMap<String, String>();
/**
* Status information
@@ -569,7 +569,7 @@
*/
@JsonIgnore
public Set<String> getRoleNames() {
- return new HashSet<>(roles.keySet());
+ return new HashSet<String>(roles.keySet());
}
/**
diff --git a/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java b/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java
index 7e73a92..21ece2b 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java
@@ -64,15 +64,15 @@
MapOperations appOptions =
aggregateConf.getAppConfOperations().getGlobalOptions();
- cd.type = internalOptions.getOption(OptionKeys.INTERNAL_PROVIDER_NAME,
+ cd.type = internalOptions.getOption(InternalKeys.INTERNAL_PROVIDER_NAME,
SliderProviderFactory.DEFAULT_CLUSTER_TYPE);
- cd.dataPath = internalOptions.get(OptionKeys.INTERNAL_DATA_DIR_PATH);
+ cd.dataPath = internalOptions.get(InternalKeys.INTERNAL_DATA_DIR_PATH);
cd.name = internalOptions.get(OptionKeys.APPLICATION_NAME);
- cd.originConfigurationPath = internalOptions.get(OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH);
- cd.generatedConfigurationPath = internalOptions.get(OptionKeys.INTERNAL_GENERATED_CONF_PATH);
- cd.setImagePath(internalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH));
- cd.setApplicationHome(internalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME));
+ cd.originConfigurationPath = internalOptions.get(InternalKeys.INTERNAL_SNAPSHOT_CONF_PATH);
+ cd.generatedConfigurationPath = internalOptions.get(InternalKeys.INTERNAL_GENERATED_CONF_PATH);
+ cd.setImagePath(internalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH));
+ cd.setApplicationHome(internalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME));
cd.setZkPath(appOptions.get(ZOOKEEPER_PATH));
cd.setZkHosts(appOptions.get(ZOOKEEPER_QUORUM));
diff --git a/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
new file mode 100644
index 0000000..ad384e2
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.api;
+
+/**
+ * Keys for internal use, go into `internal.json` and not intended for normal
+ * use except when tuning Slider AM operations
+ */
+public interface InternalKeys {
+
+
+ /**
+ * Home dir of the app: {@value}
+ * If set, implies there is a home dir to use
+ */
+ String INTERNAL_APPLICATION_HOME = "internal.application.home";
+ /**
+ * Path to an image file containing the app: {@value}
+ */
+ String INTERNAL_APPLICATION_IMAGE_PATH = "internal.application.image.path";
+ /**
+ * Time in milliseconds to wait after forking any in-AM
+ * process before attempting to start up the containers: {@value}
+ *
+ * A shorter value brings the cluster up faster, but means that if the
+ * in AM process fails (due to a bad configuration), then time
+ * is wasted starting containers on a cluster that isn't going to come
+ * up
+ */
+ String INTERNAL_CONTAINER_STARTUP_DELAY = "internal.container.startup.delay";
+ /**
+ * internal temp directory: {@value}
+ */
+ String INTERNAL_AM_TMP_DIR = "internal.tmp.dir";
+ /**
+ * where a snapshot of the original conf dir is: {@value}
+ */
+ String INTERNAL_SNAPSHOT_CONF_PATH = "internal.snapshot.conf.path";
+ /**
+ * where a snapshot of the original conf dir is: {@value}
+ */
+ String INTERNAL_GENERATED_CONF_PATH = "internal.generated.conf.path";
+ /**
+ * where a snapshot of the original conf dir is: {@value}
+ */
+ String INTERNAL_PROVIDER_NAME = "internal.provider.name";
+ /**
+ * where a snapshot of the original conf dir is: {@value}
+ */
+ String INTERNAL_DATA_DIR_PATH = "internal.data.dir.path";
+ /**
+ * Time in milliseconds to wait after forking any in-AM
+ * process before attempting to start up the containers: {@value}
+ *
+ * A shorter value brings the cluster up faster, but means that if the
+ * in AM process fails (due to a bad configuration), then time
+ * is wasted starting containers on a cluster that isn't going to come
+ * up
+ */
+ int DEFAULT_INTERNAL_CONTAINER_STARTUP_DELAY = 5000;
+ /**
+ * Version of the app: {@value}
+ */
+ String KEYTAB_LOCATION = "internal.keytab.location";
+
+
+ /**
+ * Flag to indicate whether or not the chaos monkey is enabled:
+ * {@value}
+ */
+ String CHAOS_MONKEY_ENABLED = "internal.chaos.monkey.enabled";
+ boolean DEFAULT_CHAOS_MONKEY_ENABLED = false;
+
+
+ /**
+ * Rate
+ */
+
+ String CHAOS_MONKEY_INTERVAL = "internal.chaos.monkey.interval";
+ String CHAOS_MONKEY_INTERVAL_DAYS = CHAOS_MONKEY_INTERVAL + ".days";
+ String CHAOS_MONKEY_INTERVAL_HOURS = CHAOS_MONKEY_INTERVAL + ".hours";
+ String CHAOS_MONKEY_INTERVAL_MINUTES = CHAOS_MONKEY_INTERVAL + ".minutes";
+ String CHAOS_MONKEY_INTERVAL_SECONDS = CHAOS_MONKEY_INTERVAL + ".seconds";
+
+ int DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS = 0;
+ int DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS = 0;
+ int DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES = 0;
+
+ /**
+ * Prefix for all chaos monkey probabilities
+ */
+ String CHAOS_MONKEY_PROBABILITY =
+ "internal.chaos.monkey.probability";
+ /**
+ * Probabilies are out of 10000 ; 100==1%
+ */
+
+ /**
+ * Probability of a monkey check killing the AM: {@value}
+ */
+ String CHAOS_MONKEY_PROBABILITY_AM_FAILURE = CHAOS_MONKEY_PROBABILITY +".amfailure";
+
+ /**
+ * Default probability of a monkey check killing the AM: {@value}
+ */
+ int DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE = 0;
+
+ /**
+ * Probability of a monkey check killing a container: {@value}
+ */
+
+ String CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE =
+ CHAOS_MONKEY_PROBABILITY + ".containerfailure";
+
+ /**
+ * Default probability of a monkey check killing the a container: {@value}
+ */
+ int DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE = 0;
+
+
+}
diff --git a/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java b/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java
index 048fefa..a035a99 100644
--- a/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java
@@ -22,31 +22,9 @@
* Keys for entries in the <code>options</code> section
* of a cluster description.
*/
-public interface OptionKeys {
+public interface OptionKeys extends InternalKeys {
/**
- * Home dir of the app: {@value}
- * If set, implies there is a home dir to use
- */
- String INTERNAL_APPLICATION_HOME = "internal.application.home";
-
- /**
- * Path to an image file containing the app: {@value}
- */
- String INTERNAL_APPLICATION_IMAGE_PATH = "internal.application.image.path";
-
- /**
- * Time in milliseconds to wait after forking any in-AM
- * process before attempting to start up the containers: {@value}
- *
- * A shorter value brings the cluster up faster, but means that if the
- * in AM process fails (due to a bad configuration), then time
- * is wasted starting containers on a cluster that isn't going to come
- * up
- */
- String INTERNAL_CONTAINER_STARTUP_DELAY = "internal.container.startup.delay";
-
- /**
* Time in milliseconds to wait after forking any in-AM
* process before attempting to start up the containers: {@value}
*
@@ -57,82 +35,15 @@
*/
String APPLICATION_TYPE = "application.type";
- /**
- * Time in milliseconds to wait after forking any in-AM
- * process before attempting to start up the containers: {@value}
- *
- * A shorter value brings the cluster up faster, but means that if the
- * in AM process fails (due to a bad configuration), then time
- * is wasted starting containers on a cluster that isn't going to come
- * up
- */
String APPLICATION_NAME = "application.name";
/**
- * Time in milliseconds before a container is considered long-lived.
- * Shortlived containers are interpreted as a problem with the role
- * and/or the host: {@value}
- */
- String INTERNAL_CONTAINER_FAILURE_SHORTLIFE = "internal.container.failure.shortlife";
-
- /**
- * Default short life threshold: {@value}
- */
- int DEFAULT_CONTAINER_FAILURE_SHORTLIFE = 60;
-
- /**
- * maximum number of failed containers (in a single role)
- * before the cluster is deemed to have failed {@value}
- */
- String INTERNAL_CONTAINER_FAILURE_THRESHOLD = "internal.container.failure.threshold";
-
- /**
- * Default failure threshold: {@value}
- */
- int DEFAULT_CONTAINER_FAILURE_THRESHOLD = 5;
-
- /**
- * delay for container startup:{@value}
- */
- int DEFAULT_CONTAINER_STARTUP_DELAY = 5000;
-
- /**
- * Version of the app: {@value}
- */
- String KEYTAB_LOCATION = "internal.keytab.location";
-
- /**
* Prefix for site.xml options: {@value}
*/
String SITE_XML_PREFIX = "site.";
- /**
- * internal temp directory: {@value}
- */
- String INTERNAL_AM_TMP_DIR = "internal.tmp.dir";
/**
- * where a snapshot of the original conf dir is: {@value}
- */
- String INTERNAL_SNAPSHOT_CONF_PATH = "internal.snapshot.conf.path";
-
- /**
- * where a snapshot of the original conf dir is: {@value}
- */
- String INTERNAL_GENERATED_CONF_PATH = "internal.generated.conf.path";
-
- /**
- * where a snapshot of the original conf dir is: {@value}
- */
- String INTERNAL_PROVIDER_NAME = "internal.provider.name";
-
-
- /**
- * where a snapshot of the original conf dir is: {@value}
- */
- String INTERNAL_DATA_DIR_PATH = "internal.data.dir.path";
-
- /**
* Zookeeper quorum host list: {@value}
*/
String ZOOKEEPER_QUORUM = "zookeeper.quorum";
diff --git a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
index 1c914cb..3d54140 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
@@ -20,6 +20,25 @@
/**
* These are the keys valid in resource options
+ *
+ /*
+
+ Container failure window.
+
+ The window is calculated in minutes as as (days * 24 *60 + hours* 24 + minutes)
+
+ Every interval of this period after the AM is started/restarted becomes
+ the time period in which the CONTAINER_FAILURE_THRESHOLD value is calculated.
+
+ After the window limit is reached, the failure counts are reset. This
+ is not a sliding window/moving average policy, simply a rule such as
+ "every six hours the failure count is reset"
+
+
+ <pre>
+ ===========================================================================
+ </pre>
+
*/
public interface ResourceKeys {
@@ -69,4 +88,47 @@
* placement policy
*/
String COMPONENT_PLACEMENT_POLICY = "yarn.component.placement.policy";
+
+
+
+ /**
+ * Time in seconds before a container is considered long-lived.
+ * Shortlived containers are interpreted as a problem with the role
+ * and/or the host: {@value}
+ */
+ String CONTAINER_FAILURE_SHORTLIFE =
+ "container.failure.shortlife";
+
+ /**
+ * Default short life threshold: {@value}
+ */
+ int DEFAULT_CONTAINER_FAILURE_SHORTLIFE = 60;
+
+ /**
+ * maximum number of failed containers (in a single role)
+ * before the cluster is deemed to have failed {@value}
+ */
+ String CONTAINER_FAILURE_THRESHOLD =
+ "yarn.container.failure.threshold";
+
+ /**
+ * prefix for the time of the container failure reset window.
+ * {@value}
+ */
+
+ String CONTAINER_FAILURE_WINDOW =
+ "yarn.container.failure.window";
+
+
+
+ int DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS = 0;
+ int DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS = 6;
+ int DEFAULT_CONTAINER_FAILURE_WINDOW_MINUTES = 0;
+
+
+ /**
+ * Default failure threshold: {@value}
+ */
+ int DEFAULT_CONTAINER_FAILURE_THRESHOLD = 5;
+
}
diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
index e762c1e..93f6207 100644
--- a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
+++ b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.ClusterNode;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.SliderClusterProtocol;
@@ -182,7 +183,7 @@
Configuration clientConf = SliderUtils.loadClientConfigurationResource();
ConfigHelper.mergeConfigurations(conf, clientConf, CLIENT_RESOURCE);
serviceArgs.applyDefinitions(conf);
- serviceArgs.applyFileSystemURL(conf);
+ serviceArgs.applyFileSystemBinding(conf);
// init security with our conf
if (SliderUtils.isHadoopClusterSecure(conf)) {
SliderUtils.forceLogin();
@@ -205,6 +206,8 @@
* @return the exit code
* @throws Throwable anything that went wrong
*/
+/* JDK7
+
@Override
public int runService() throws Throwable {
@@ -217,6 +220,9 @@
case ACTION_BUILD:
exitCode = actionBuild(clusterName, serviceArgs.getActionBuildArgs());
break;
+ case ACTION_UPDATE:
+ exitCode = actionUpdate(clusterName, serviceArgs.getActionUpdateArgs());
+ break;
case ACTION_CREATE:
exitCode = actionCreate(clusterName, serviceArgs.getActionCreateArgs());
break;
@@ -274,6 +280,66 @@
return exitCode;
}
+*/
+ @Override
+ public int runService() throws Throwable {
+
+ // choose the action
+ String action = serviceArgs.getAction();
+ int exitCode = EXIT_SUCCESS;
+ String clusterName = serviceArgs.getClusterName();
+ // actions
+ if (ACTION_BUILD.equals(action)) {
+ exitCode = actionBuild(clusterName, serviceArgs.getActionBuildArgs());
+ } else if (ACTION_CREATE.equals(action)) {
+ exitCode = actionCreate(clusterName, serviceArgs.getActionCreateArgs());
+ } else if (ACTION_FREEZE.equals(action)) {
+ exitCode = actionFreeze(clusterName,
+ serviceArgs.getActionFreezeArgs());
+ } else if (ACTION_THAW.equals(action)) {
+ exitCode = actionThaw(clusterName, serviceArgs.getActionThawArgs());
+ } else if (ACTION_DESTROY.equals(action)) {
+ exitCode = actionDestroy(clusterName);
+ } else if (ACTION_EXISTS.equals(action)) {
+ exitCode = actionExists(clusterName,
+ serviceArgs.getActionExistsArgs().live);
+ } else if (ACTION_FLEX.equals(action)) {
+ exitCode = actionFlex(clusterName, serviceArgs.getActionFlexArgs());
+ } else if (ACTION_GETCONF.equals(action)) {
+ exitCode = actionGetConf(clusterName, serviceArgs.getActionGetConfArgs());
+ } else if (ACTION_HELP.equals(action) ||
+ ACTION_USAGE.equals(action)) {
+ log.info(serviceArgs.usage());
+
+ } else if (ACTION_KILL_CONTAINER.equals(action)) {
+ exitCode = actionKillContainer(clusterName,
+ serviceArgs.getActionKillContainerArgs());
+
+ } else if (ACTION_AM_SUICIDE.equals(action)) {
+ exitCode = actionAmSuicide(clusterName,
+ serviceArgs.getActionAMSuicideArgs());
+
+ } else if (ACTION_LIST.equals(action)) {
+ exitCode = actionList(clusterName);
+ } else if (ACTION_REGISTRY.equals(action)) {
+ exitCode = actionRegistry(
+ serviceArgs.getActionRegistryArgs());
+ } else if (ACTION_STATUS.equals(action)) {
+ exitCode = actionStatus(clusterName,
+ serviceArgs.getActionStatusArgs());
+ } else if (ACTION_UPDATE.equals(action)) {
+ exitCode = actionUpdate(clusterName, serviceArgs.getActionUpdateArgs());
+
+ } else if (ACTION_VERSION.equals(action)) {
+
+ exitCode = actionVersion();
+ } else {
+ throw new SliderException(EXIT_UNIMPLEMENTED,
+ "Unimplemented: " + action);
+ }
+
+ return exitCode;
+ }
/**
* Delete the zookeeper node associated with the calling user and the cluster
**/
@@ -327,9 +393,12 @@
client.createPath(zkPath, "", ZooDefs.Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT);
return zkPath;
+
+ //JDK7
+// } catch (InterruptedException | KeeperException e) {
} catch (InterruptedException e) {
log.warn("Unable to create zk node {}", zkPath, e);
- } catch (KeeperException e) {
+ } catch ( KeeperException e) {
log.warn("Unable to create zk node {}", zkPath, e);
}
}
@@ -460,27 +529,42 @@
YarnException,
IOException {
- buildInstanceDefinition(clustername, buildInfo);
+ buildInstanceDefinition(clustername, buildInfo, false, false);
return EXIT_SUCCESS;
}
+ /**
+ * Update the cluster specification
+ *
+ * @param clustername cluster name
+ * @param buildInfo the arguments needed to update the cluster
+ * @throws YarnException Yarn problems
+ * @throws IOException other problems
+ */
+ public int actionUpdate(String clustername, AbstractClusterBuildingActionArgs buildInfo) throws
+ YarnException, IOException {
+ buildInstanceDefinition(clustername, buildInfo, true, true);
+ return EXIT_SUCCESS;
+ }
/**
* Build up the AggregateConfiguration for an application instance then
* persists it
* @param clustername name of the cluster
* @param buildInfo the arguments needed to build the cluster
+ * @param overwrite true if existing cluster directory can be overwritten
+ * @param liveClusterAllowed true if live cluster can be modified
* @throws YarnException
* @throws IOException
*/
public void buildInstanceDefinition(String clustername,
- AbstractClusterBuildingActionArgs buildInfo)
+ AbstractClusterBuildingActionArgs buildInfo, boolean overwrite, boolean liveClusterAllowed)
throws YarnException, IOException {
// verify that a live cluster isn't there
SliderUtils.validateClusterName(clustername);
verifyBindingsDefined();
- verifyNoLiveClusters(clustername);
+ if (!liveClusterAllowed) verifyNoLiveClusters(clustername);
Configuration conf = getConfig();
String registryQuorum = lookupZKQuorum();
@@ -570,6 +654,7 @@
// resource component args
appConf.merge(cmdLineResourceOptions);
+ resources.merge(cmdLineResourceOptions);
resources.mergeComponents(buildInfo.getResourceCompOptionMap());
builder.init(providerName, instanceDefinition);
@@ -629,7 +714,7 @@
throw e;
}
try {
- builder.persist(appconfdir);
+ builder.persist(appconfdir, overwrite);
} catch (LockAcquireFailedException e) {
log.warn("Failed to get a Lock on {} : {}", builder, e);
throw new BadClusterStateException("Failed to save " + clustername
@@ -737,9 +822,6 @@
return instanceDefinition;
}
-
-
-
/**
*
@@ -779,16 +861,16 @@
instanceDefinition.getAppConfOperations();
Path generatedConfDirPath =
createPathThatMustExist(internalOptions.getMandatoryOption(
- OptionKeys.INTERNAL_GENERATED_CONF_PATH));
+ InternalKeys.INTERNAL_GENERATED_CONF_PATH));
Path snapshotConfPath =
createPathThatMustExist(internalOptions.getMandatoryOption(
- OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH));
+ InternalKeys.INTERNAL_SNAPSHOT_CONF_PATH));
// cluster Provider
AbstractClientProvider provider = createClientProvider(
internalOptions.getMandatoryOption(
- OptionKeys.INTERNAL_PROVIDER_NAME));
+ InternalKeys.INTERNAL_PROVIDER_NAME));
// make sure the conf dir is valid;
// now build up the image path
@@ -817,8 +899,8 @@
// set the application name;
amLauncher.setKeepContainersOverRestarts(true);
- amLauncher.setMaxAppAttempts(config.getInt(KEY_AM_RESTART_LIMIT,
- DEFAULT_AM_RESTART_LIMIT));
+ int maxAppAttempts = config.getInt(KEY_AM_RESTART_LIMIT, 0);
+ amLauncher.setMaxAppAttempts(maxAppAttempts);
sliderFileSystem.purgeAppInstanceTempFiles(clustername);
Path tempPath = sliderFileSystem.createAppInstanceTempPath(
@@ -990,8 +1072,8 @@
commandLine.add(Arguments.ARG_RM_ADDR, rmAddr);
}
- if (serviceArgs.getFilesystemURL() != null) {
- commandLine.add(Arguments.ARG_FILESYSTEM, serviceArgs.getFilesystemURL());
+ if (serviceArgs.getFilesystemBinding() != null) {
+ commandLine.add(Arguments.ARG_FILESYSTEM, serviceArgs.getFilesystemBinding());
}
addConfOptionToCLI(commandLine, config, REGISTRY_PATH,
@@ -1366,7 +1448,7 @@
verifyBindingsDefined();
SliderUtils.validateClusterName(name);
log.debug("actionFlex({})", name);
- Map<String, Integer> roleInstances = new HashMap<>();
+ Map<String, Integer> roleInstances = new HashMap<String, Integer>();
Map<String, String> roleMap = args.getComponentMap();
for (Map.Entry<String, String> roleEntry : roleMap.entrySet()) {
String key = roleEntry.getKey();
@@ -1661,7 +1743,12 @@
return EXIT_FALSE;
}
}
- } catch (YarnException | IOException e) {
+
+// JDK7 } catch (YarnException | IOException e) {
+ } catch (YarnException e) {
+ log.warn("Exception while waiting for the cluster {} to shut down: {}",
+ clustername, e);
+ } catch ( IOException e) {
log.warn("Exception while waiting for the cluster {} to shut down: {}",
clustername, e);
}
@@ -1725,6 +1812,8 @@
}
try {
String description = "Slider Application Instance " + clustername;
+// JDK7
+/*
switch (format) {
case Arguments.FORMAT_XML:
Configuration siteConf = getSiteConf(status, clustername);
@@ -1738,6 +1827,17 @@
default:
throw new BadCommandArgumentsException("Unknown format: " + format);
}
+*/
+ if (Arguments.FORMAT_XML.equals(format)) {
+ Configuration siteConf = getSiteConf(status, clustername);
+ siteConf.writeXml(writer);
+ } else if (Arguments.FORMAT_PROPERTIES.equals(format)) {
+ Properties props = new Properties();
+ props.putAll(status.clientProperties);
+ props.store(writer, description);
+ } else {
+ throw new BadCommandArgumentsException("Unknown format: " + format);
+ }
} finally {
// data is written.
// close the file
@@ -1841,10 +1941,8 @@
* @throws IOException any problems loading -including a missing file
*/
@VisibleForTesting
- public AggregateConf loadPersistedClusterDescription(String clustername) throws
- IOException,
- SliderException,
- LockAcquireFailedException {
+ public AggregateConf loadPersistedClusterDescription(String clustername)
+ throws IOException, SliderException, LockAcquireFailedException {
Path clusterDirectory = sliderFileSystem.buildClusterDirPath(clustername);
ConfPersister persister = new ConfPersister(sliderFileSystem, clusterDirectory);
AggregateConf instanceDescription = new AggregateConf();
@@ -1921,7 +2019,7 @@
if (uuids.length == 0) {
// short cut on an empty list
- return new LinkedList<>();
+ return new LinkedList<ClusterNode>();
}
return createClusterOperations().listClusterNodes(uuids);
}
@@ -2120,7 +2218,7 @@
+ serviceType
+ " name " + name);
}
- List<ServiceInstanceData> sids = new ArrayList<>(size);
+ List<ServiceInstanceData> sids = new ArrayList<ServiceInstanceData>(size);
for (CuratorServiceInstance<ServiceInstanceData> instance : instances) {
ServiceInstanceData payload = instance.payload;
logInstance(payload, registryArgs.verbose);
@@ -2305,7 +2403,10 @@
try {
maybeStartRegistry();
return registry.instanceIDs(SliderKeys.APP_TYPE);
- } catch (YarnException | IOException e) {
+/// JDK7 } catch (YarnException | IOException e) {
+ } catch (IOException e) {
+ throw e;
+ } catch (YarnException e) {
throw e;
} catch (Exception e) {
throw new IOException(e);
diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java b/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java
index e7b492b..3151a09 100644
--- a/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java
+++ b/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java
@@ -70,10 +70,10 @@
*/
public List<ApplicationReport> listInstances(String user)
throws YarnException, IOException {
- Set<String> types = new HashSet<>(1);
+ Set<String> types = new HashSet<String>(1);
types.add(SliderKeys.APP_TYPE);
List<ApplicationReport> allApps = getApplications(types);
- List<ApplicationReport> results = new ArrayList<>();
+ List<ApplicationReport> results = new ArrayList<ApplicationReport>();
for (ApplicationReport report : allApps) {
if (StringUtils.isEmpty(user) || user.equals(report.getUser())) {
results.add(report);
@@ -97,7 +97,7 @@
YarnException {
List<ApplicationReport> instances = listInstances(user);
List<ApplicationReport> results =
- new ArrayList<>(instances.size());
+ new ArrayList<ApplicationReport>(instances.size());
for (ApplicationReport report : instances) {
if (report.getName().equals(appname)) {
results.add(report);
@@ -240,7 +240,7 @@
IOException {
List<ApplicationReport> instances = listInstances(user);
List<ApplicationReport> results =
- new ArrayList<>(instances.size());
+ new ArrayList<ApplicationReport>(instances.size());
for (ApplicationReport app : instances) {
if (app.getName().equals(appname)
&& isApplicationLive(app)) {
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java b/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java
index 79b77dc..b115d98 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java
@@ -73,7 +73,7 @@
int EXIT_PROCESS_FAILED = 72;
/**
- * The cluster failed -too many containers were
+ * The instance failed -too many containers were
* failing or some other threshold was reached
*/
int EXIT_DEPLOYMENT_FAILED = 73;
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java b/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java
index 0ba562a..38f55c2 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java
@@ -50,7 +50,7 @@
/**
* Application type for YARN {@value}
*/
- String APP_TYPE = "org.apache.slider";
+ String APP_TYPE = "org-apache-slider";
/**
* JVM arg to force IPv4 {@value}
@@ -167,4 +167,9 @@
String PASSPHRASE = "DEV";
String PASS_LEN = "50";
String KEYSTORE_LOCATION = "ssl.server.keystore.location";
+
+ /**
+ * Python specific
+ */
+ String PYTHONPATH = "PYTHONPATH";
}
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java b/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java
index 6d3fe38..e31cfb6 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java
@@ -71,4 +71,9 @@
*/
String KEY_TEST_YARN_RAM_REQUEST = "slider.test.yarn.ram";
String DEFAULT_YARN_RAM_REQUEST = "192";
+
+ /**
+ * security related keys
+ */
+ String TEST_SECURITY_DIR = "/tmp/work/security";
}
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java b/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java
index 3f16f25..1bbe9ae 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java
@@ -84,19 +84,6 @@
String KEY_AM_RESTART_LIMIT = "slider.yarn.restart.limit";
/**
- * Default Limit on restarts for the AM
- * {@value}
- */
- int DEFAULT_AM_RESTART_LIMIT = 2;
-
- /**
- * Flag which is set to indicate that security should be enabled
- * when talking to this cluster.
- */
- String KEY_SECURITY =
- CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
-
- /**
* queue name
*/
String KEY_YARN_QUEUE = "slider.yarn.queue";
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java
index 44bc239..f4a4569 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java
@@ -25,7 +25,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.net.URI;
import java.util.ArrayList;
import java.util.List;
@@ -41,25 +40,22 @@
}
/**
- * URI of the filesystem
+ * URI/binding to the filesystem
*/
@Parameter(names = {ARG_FILESYSTEM, ARG_FILESYSTEM_LONG},
- description = "Filesystem URI",
- converter = URIArgumentConverter.class)
- public URI filesystemURL;
+ description = "Filesystem Binding")
+ public String filesystemBinding;
@Parameter(names = {ARG_BASE_PATH},
description = "Slider base path on the filesystem",
converter = PathArgumentConverter.class)
public Path basePath;
-
/**
* This is the default parameter
*/
@Parameter
- public final List<String> parameters = new ArrayList<>();
-
+ public final List<String> parameters = new ArrayList<String>();
/**
* get the name: relies on arg 1 being the cluster name in all operations
@@ -80,7 +76,7 @@
*/
@Parameter(names = ARG_DEFINE, arity = 1, description = "Definitions")
- public final List<String> definitions = new ArrayList<>();
+ public final List<String> definitions = new ArrayList<String>();
/**
* System properties
@@ -88,11 +84,11 @@
@Parameter(names = {ARG_SYSPROP}, arity = 1,
description = "system properties in the form name value" +
" These are set after the JVM is started.")
- public final List<String> sysprops = new ArrayList<>(0);
+ public final List<String> sysprops = new ArrayList<String>(0);
@Parameter(names = {ARG_MANAGER_SHORT, ARG_MANAGER},
- description = "hostname:port of the YARN resource manager")
+ description = "Binding (usually hostname:port) of the YARN resource manager")
public String manager;
@@ -110,7 +106,7 @@
/**
* Get the name of the action
- * @return
+ * @return the action name
*/
public abstract String getActionName() ;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/common/params/ActionUpdateArgs.java
similarity index 68%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/common/params/ActionUpdateArgs.java
index e3e595f..9d76bd8 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/ActionUpdateArgs.java
@@ -16,16 +16,17 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.common.params;
-public class AbstractRMOperation {
+import com.beust.jcommander.Parameters;
- /**
- * Execute the operation
- * @param asyncRMClient client
- */
- public void execute(RMOperationHandler handler) {
+@Parameters(commandNames = {SliderActions.ACTION_UPDATE},
+ commandDescription = SliderActions.DESCRIBE_ACTION_UPDATE)
+public class ActionUpdateArgs extends AbstractClusterBuildingActionArgs {
+
+ @Override
+ public String getActionName() {
+ return SliderActions.ACTION_UPDATE;
}
-
}
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java b/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java
index 248e4c2..1f07de3 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java
@@ -37,7 +37,7 @@
@Parameter(names = {ARG_OPTION, ARG_OPTION_SHORT}, arity = 2,
description = ARG_OPTION + "<name> <value>",
splitter = DontSplitArguments.class)
- public List<String> optionTuples = new ArrayList<>(0);
+ public List<String> optionTuples = new ArrayList<String>(0);
/**
@@ -47,7 +47,7 @@
description = "Component option " + ARG_COMP_OPT +
" <component> <name> <option>",
splitter = DontSplitArguments.class)
- public List<String> compOptTriples = new ArrayList<>(0);
+ public List<String> compOptTriples = new ArrayList<String>(0);
/**
* Resource Options
@@ -55,7 +55,7 @@
@Parameter(names = {ARG_RESOURCE_OPT, ARG_RESOURCE_OPT_SHORT}, arity = 2,
description = "Resource option "+ ARG_RESOURCE_OPT + "<name> <value>",
splitter = DontSplitArguments.class)
- public List<String> resOptionTuples = new ArrayList<>(0);
+ public List<String> resOptionTuples = new ArrayList<String>(0);
/**
@@ -65,7 +65,7 @@
description = "Component resource option " + ARG_RES_COMP_OPT +
" <component> <name> <option>",
splitter = DontSplitArguments.class)
- public List<String> resCompOptTriples = new ArrayList<>(0);
+ public List<String> resCompOptTriples = new ArrayList<String>(0);
public Map<String, String> getOptionsMap() throws
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java b/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java
index 0837dd2..83754b3 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java
@@ -44,7 +44,7 @@
* create a 3-tuple
*/
public static List<Object> triple(String msg, int min, int max) {
- List<Object> l = new ArrayList<>(3);
+ List<Object> l = new ArrayList<Object>(3);
l.add(msg);
l.add(min);
l.add(max);
@@ -58,11 +58,12 @@
return triple(msg, min, min);
}
- public static void applyFileSystemURL(URI filesystemURL, Configuration conf) {
- if (filesystemURL != null) {
+ public static void applyFileSystemBinding(String filesystemBinding,
+ Configuration conf) {
+ if (filesystemBinding != null) {
//filesystem argument was set -this overwrites any defaults in the
//configuration
- FileSystem.setDefaultUri(conf, filesystemURL);
+ FileSystem.setDefaultUri(conf, filesystemBinding);
}
}
@@ -99,7 +100,7 @@
public static Map<String, String> convertTupleListToMap(String description,
List<String> list) throws
BadCommandArgumentsException {
- Map<String, String> results = new HashMap<>();
+ Map<String, String> results = new HashMap<String, String>();
if (list != null && !list.isEmpty()) {
int size = list.size();
if (size % 2 != 0) {
@@ -133,7 +134,7 @@
List<String> list) throws
BadCommandArgumentsException {
Map<String, Map<String, String>> results =
- new HashMap<>();
+ new HashMap<String, Map<String, String>>();
if (list != null && !list.isEmpty()) {
int size = list.size();
if (size % 3 != 0) {
@@ -148,7 +149,7 @@
Map<String, String> roleMap = results.get(role);
if (roleMap == null) {
//demand create new role map
- roleMap = new HashMap<>();
+ roleMap = new HashMap<String, String>();
results.put(role, roleMap);
}
if (roleMap.get(key) != null) {
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java
index 44a2a7a..ca854f1 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java
@@ -46,6 +46,7 @@
private AbstractClusterBuildingActionArgs buildingActionArgs;
private final ActionAMSuicideArgs actionAMSuicideArgs = new ActionAMSuicideArgs();
private final ActionBuildArgs actionBuildArgs = new ActionBuildArgs();
+ private final ActionUpdateArgs actionUpdateArgs = new ActionUpdateArgs();
private final ActionCreateArgs actionCreateArgs = new ActionCreateArgs();
private final ActionDestroyArgs actionDestroyArgs = new ActionDestroyArgs();
private final ActionExistsArgs actionExistsArgs = new ActionExistsArgs();
@@ -77,6 +78,7 @@
actionAMSuicideArgs,
actionBuildArgs,
actionCreateArgs,
+ actionUpdateArgs,
actionDestroyArgs,
actionExistsArgs,
actionFlexArgs,
@@ -101,9 +103,10 @@
log.debug("Setting RM to {}", getManager());
conf.set(YarnConfiguration.RM_ADDRESS, getManager());
}
- if ( getBasePath() != null ) {
+ if (getBasePath() != null) {
log.debug("Setting basePath to {}", getBasePath());
- conf.set(SliderXmlConfKeys.KEY_SLIDER_BASE_PATH, getBasePath().toString());
+ conf.set(SliderXmlConfKeys.KEY_SLIDER_BASE_PATH,
+ getBasePath().toString());
}
}
@@ -119,6 +122,10 @@
return actionBuildArgs;
}
+ public ActionUpdateArgs getActionUpdateArgs() {
+ return actionUpdateArgs;
+ }
+
public ActionCreateArgs getActionCreateArgs() {
return actionCreateArgs;
}
@@ -182,6 +189,9 @@
//its a builder, so set those actions too
buildingActionArgs = actionCreateArgs;
+ } else if (SliderActions.ACTION_UPDATE.equals(action)) {
+ bindCoreAction(actionUpdateArgs);
+
} else if (SliderActions.ACTION_FREEZE.equals(action)) {
bindCoreAction(actionFreezeArgs);
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java
index 4cee1d1..5d94182 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java
@@ -30,7 +30,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.net.URI;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
@@ -63,11 +62,11 @@
*/
- public Map<String, String> definitionMap = new HashMap<>();
+ public Map<String, String> definitionMap = new HashMap<String, String>();
/**
* System properties
*/
- public Map<String, String> syspropsMap = new HashMap<>();
+ public Map<String, String> syspropsMap = new HashMap<String, String>();
/**
@@ -215,12 +214,12 @@
/**
- * If the Filesystem URL was provided, it overrides anything in
+ * If the Filesystem binding was provided, it overrides anything in
* the configuration
* @param conf configuration
*/
- public void applyFileSystemURL(Configuration conf) {
- ArgOps.applyFileSystemURL(getFilesystemURL(), conf);
+ public void applyFileSystemBinding(Configuration conf) {
+ ArgOps.applyFileSystemBinding(getFilesystemBinding(), conf);
}
public boolean isDebug() {
@@ -228,8 +227,8 @@
}
- public URI getFilesystemURL() {
- return coreAction.filesystemURL;
+ public String getFilesystemBinding() {
+ return coreAction.filesystemBinding;
}
public Path getBasePath() { return coreAction.basePath; }
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java b/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java
index 0344305..3225133 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java
@@ -27,7 +27,7 @@
@Override
public List<String> split(String value) {
- List<String> list = new ArrayList<>(1);
+ List<String> list = new ArrayList<String>(1);
list.add(value);
return list;
}
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java b/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java
index 2219a25..8e50a83 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java
@@ -27,6 +27,7 @@
String ACTION_AM_SUICIDE = "am-suicide";
String ACTION_BUILD = "build";
String ACTION_CREATE = "create";
+ String ACTION_UPDATE = "update";
String ACTION_DESTROY = "destroy";
String ACTION_ECHO = "echo";
String ACTION_EXISTS = "exists";
@@ -49,6 +50,8 @@
"Build a Slider cluster specification -but do not start it";
String DESCRIBE_ACTION_CREATE =
"Create a live Slider application";
+ String DESCRIBE_ACTION_UPDATE =
+ "Update template for a Slider application";
String DESCRIBE_ACTION_DESTROY =
"Destroy a frozen Slider application)";
String DESCRIBE_ACTION_EXISTS =
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/Comparators.java b/slider-core/src/main/java/org/apache/slider/common/tools/Comparators.java
new file mode 100644
index 0000000..0ccca0f
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/Comparators.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.common.tools;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+public class Comparators {
+
+ public static class LongComparator implements Comparator<Long>, Serializable {
+ @Override
+ public int compare(Long o1, Long o2) {
+ long result = o1 - o2;
+ // need to comparisons with a diff greater than integer size
+ if (result < 0 ) {
+ return -1;
+ } else if (result >0) {
+ return 1;
+ }
+ return 0;
+ }
+ }
+
+ /**
+ * Little template class to reverse any comparitor
+ * @param <CompareType> the type that is being compared
+ */
+ public static class ComparatorReverser<CompareType> implements Comparator<CompareType>,
+ Serializable {
+
+ final Comparator<CompareType> instance;
+
+ public ComparatorReverser(Comparator<CompareType> instance) {
+ this.instance = instance;
+ }
+
+ @Override
+ public int compare(CompareType first, CompareType second) {
+ return instance.compare(second, first);
+ }
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java b/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java
index 2f0b9ac..b7e1323 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java
@@ -81,7 +81,7 @@
*/
public static TreeSet<String> sortedConfigKeys(Iterable<Map.Entry<String, String>> conf) {
- TreeSet<String> sorted = new TreeSet<>();
+ TreeSet<String> sorted = new TreeSet<String>();
for (Map.Entry<String, String> entry : conf) {
sorted.add(entry.getKey());
}
@@ -182,24 +182,53 @@
public Document parseConfiguration(FileSystem fs,
Path path) throws
IOException {
- int len = (int) fs.getLength(path);
- byte[] data = new byte[len];
- try(FSDataInputStream in = fs.open(path)) {
- in.readFully(0, data);
- }
+
+ byte[] data = loadBytes(fs, path);
//this is here to track down a parse issue
//related to configurations
- String s = new String(data, 0, len);
+ String s = new String(data, 0, data.length);
log.debug("XML resource {} is \"{}\"", path, s);
+/* JDK7
try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
Document document = parseConfigXML(in);
return document;
} catch (ParserConfigurationException | SAXException e) {
throw new IOException(e);
}
+*/
+ ByteArrayInputStream in= null;
+ try {
+ in = new ByteArrayInputStream(data);
+ Document document = parseConfigXML(in);
+ return document;
+ } catch (ParserConfigurationException e) {
+ throw new IOException(e);
+ } catch (SAXException e) {
+ throw new IOException(e);
+ } finally {
+ IOUtils.closeStream(in);
+ }
}
-
+
+ public static byte[] loadBytes(FileSystem fs, Path path) throws IOException {
+ int len = (int) fs.getLength(path);
+ byte[] data = new byte[len];
+ /* JDK7
+ try(FSDataInputStream in = fs.open(path)) {
+ in.readFully(0, data);
+ }
+*/
+ FSDataInputStream in = null;
+ in = fs.open(path);
+ try {
+ in.readFully(0, data);
+ } finally {
+ IOUtils.closeStream(in);
+ }
+ return data;
+ }
+
/**
* Load a configuration from ANY FS path. The normal Configuration
* loader only works with file:// URIs
@@ -209,13 +238,9 @@
* @throws IOException
*/
public static Configuration loadConfiguration(FileSystem fs,
- Path path) throws
- IOException {
- int len = (int) fs.getLength(path);
- byte[] data = new byte[len];
- try (FSDataInputStream in = fs.open(path)) {
- in.readFully(0, data);
- }
+ Path path) throws IOException {
+ byte[] data = loadBytes(fs, path);
+
ByteArrayInputStream in2;
in2 = new ByteArrayInputStream(data);
@@ -510,7 +535,7 @@
* @return hash map
*/
public static Map<String, String> buildMapFromConfiguration(Configuration conf) {
- Map<String, String> map = new HashMap<>();
+ Map<String, String> map = new HashMap<String, String>();
return SliderUtils.mergeEntries(map, conf);
}
@@ -523,7 +548,8 @@
* @param valuesource the source of values
* @return a new configuration where <code>foreach key in keysource, get(key)==valuesource.get(key)</code>
*/
- public static Configuration resolveConfiguration(Iterable<Map.Entry<String, String>> keysource,
+ public static Configuration resolveConfiguration(
+ Iterable<Map.Entry<String, String>> keysource,
Configuration valuesource) {
Configuration result = new Configuration(false);
for (Map.Entry<String, String> entry : keysource) {
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java b/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java
index 714322c..def252a 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java
@@ -50,6 +50,9 @@
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
import static org.apache.slider.common.SliderXmlConfKeys.CLUSTER_DIRECTORY_PERMISSIONS;
import static org.apache.slider.common.SliderXmlConfKeys.DEFAULT_CLUSTER_DIRECTORY_PERMISSIONS;
@@ -242,6 +245,7 @@
IOException,
SliderException {
if (fileSystem.exists(clusterDirectory)) {
+
log.error("Dir {} exists: {}",
clusterDirectory,
listFSDir(clusterDirectory));
@@ -303,6 +307,37 @@
}
/**
+ * Verify that a file exists in the zip file given by path
+ * @param path path to zip file
+ * @param file file expected to be in zip
+ * @throws FileNotFoundException file not found or is not a zip file
+ * @throws IOException trouble with FS
+ */
+ public void verifyFileExistsInZip(Path path, String file) throws IOException {
+ fileSystem.copyToLocalFile(path, new Path("/tmp"));
+ File dst = new File((new Path("/tmp", path.getName())).toString());
+ Enumeration<? extends ZipEntry> entries;
+ ZipFile zipFile = new ZipFile(dst);
+ boolean found = false;
+
+ try {
+ entries = zipFile.entries();
+ while (entries.hasMoreElements()) {
+ ZipEntry entry = entries.nextElement();
+ String nm = entry.getName();
+ if (nm.endsWith(file)) {
+ found = true;
+ break;
+ }
+ }
+ } finally {
+ zipFile.close();
+ }
+ dst.delete();
+ if (!found) throw new FileNotFoundException("file: " + file + " not found in " + path);
+ log.info("Verification of " + path + " passed");
+ }
+ /**
* Create the application-instance specific temporary directory
* in the DFS
*
@@ -407,7 +442,7 @@
//copied to the destination
FileStatus[] fileset = fileSystem.listStatus(srcDir);
Map<String, LocalResource> localResources =
- new HashMap<>(fileset.length);
+ new HashMap<String, LocalResource>(fileset.length);
for (FileStatus entry : fileset) {
LocalResource resource = createAmResource(entry.getPath(),
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
index 17f8b70..188b7d9 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
@@ -18,6 +18,7 @@
package org.apache.slider.common.tools;
+import com.google.common.base.Preconditions;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.io.output.ByteArrayOutputStream;
@@ -42,7 +43,7 @@
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.RoleKeys;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.common.SliderXmlConfKeys;
@@ -439,7 +440,7 @@
* @return a stringified list
*/
public static List<String> collectionToStringList(Collection c) {
- List<String> l = new ArrayList<>(c.size());
+ List<String> l = new ArrayList<String>(c.size());
for (Object o : c) {
l.add(o.toString());
}
@@ -467,13 +468,19 @@
*/
public static String join(Collection collection, String separator, boolean trailing) {
StringBuilder b = new StringBuilder();
+ // fast return on empty collection
+ if (collection.isEmpty()) {
+ return trailing ? separator : "";
+ }
for (Object o : collection) {
b.append(o);
b.append(separator);
}
- return trailing?
- b.toString()
- : (b.substring(0, b.length() - separator.length()));
+ int length = separator.length();
+ String s = b.toString();
+ return (trailing || s.isEmpty())?
+ s
+ : (b.substring(0, b.length() - length));
}
/**
@@ -610,6 +617,8 @@
*/
public static <T1, T2> Map<T1, T2> mergeMapsIgnoreDuplicateKeys(Map<T1, T2> first,
Map<T1, T2> second) {
+ Preconditions.checkArgument(first != null, "Null 'first' value");
+ Preconditions.checkArgument(second != null, "Null 'second' value");
for (Map.Entry<T1, T2> entry : second.entrySet()) {
T1 key = entry.getKey();
if (!first.containsKey(key)) {
@@ -830,7 +839,7 @@
* @return a possibly empty map of environment variables.
*/
public static Map<String, String> buildEnvMap(Map<String, String> roleOpts) {
- Map<String, String> env = new HashMap<>();
+ Map<String, String> env = new HashMap<String, String>();
if (roleOpts != null) {
for (Map.Entry<String, String> entry: roleOpts.entrySet()) {
String key = entry.getKey();
@@ -857,7 +866,7 @@
Map<String, String> optionMap = entry.getValue();
Map<String, String> existingMap = clusterRoleMap.get(key);
if (existingMap == null) {
- existingMap = new HashMap<>();
+ existingMap = new HashMap<String, String>();
}
log.debug("Overwriting role options with command line values {}",
stringifyMap(optionMap));
@@ -1022,7 +1031,7 @@
}
public static Map<String, Map<String, String>> deepClone(Map<String, Map<String, String>> src) {
- Map<String, Map<String, String>> dest = new HashMap<>();
+ Map<String, Map<String, String>> dest = new HashMap<String, Map<String, String>>();
for (Map.Entry<String, Map<String, String>> entry : src.entrySet()) {
dest.put(entry.getKey(), stringMapClone(entry.getValue()));
}
@@ -1030,7 +1039,7 @@
}
public static Map<String, String> stringMapClone(Map<String, String> src) {
- Map<String, String> dest = new HashMap<>();
+ Map<String, String> dest = new HashMap<String, String>();
return mergeEntries(dest, src.entrySet());
}
@@ -1073,7 +1082,7 @@
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
return currentUser;
} catch (IOException e) {
- log.info("Failed to grt user info", e);
+ log.info("Failed to get user info", e);
throw e;
}
}
@@ -1294,8 +1303,8 @@
SliderException, IOException {
Path imagePath;
String imagePathOption =
- internalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
- String appHomeOption = internalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME);
+ internalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ String appHomeOption = internalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME);
if (!isUnset(imagePathOption)) {
imagePath = fs.createPathThatMustExist(imagePathOption);
} else {
@@ -1357,9 +1366,9 @@
/**
* Append a list of paths, inserting "/" signs as appropriate
- * @param base
- * @param paths
- * @return
+ * @param base base path/URL
+ * @param paths subpaths
+ * @return base+"/"+paths[0]+"/"+paths[1]...
*/
public static String appendToURL(String base, String...paths) {
String result = base;
@@ -1415,6 +1424,21 @@
}
/**
+ * A compareTo function that converts the result of a long
+ * comparision into the integer that <code>Comparable</code>
+ * expects.
+ * @param left left side
+ * @param right right side
+ * @return -1, 0, 1 depending on the diff
+ */
+ public static int compareTo(long left, long right) {
+ long diff = left - right;
+ if (diff < 0) return -1;
+ if (diff > 0) return 1;
+ return 0;
+ }
+
+ /**
* This wrapps ApplicationReports and generates a string version
* iff the toString() operator is invoked
*/
@@ -1452,7 +1476,7 @@
}
is = new ByteArrayInputStream(content);
} else {
- log.info("Size unknown. Reading {}", zipEntry.getName());
+ log.debug("Size unknown. Reading {}", zipEntry.getName());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
while (true) {
int byteRead = zis.read();
diff --git a/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java b/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java
index 0580013..937b777 100644
--- a/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java
+++ b/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java
@@ -24,6 +24,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.StatusKeys;
import org.apache.slider.common.SliderXmlConfKeys;
@@ -139,7 +140,7 @@
instancePaths.dataPath.toUri());
- internalOps.set(OptionKeys.INTERNAL_PROVIDER_NAME, provider);
+ internalOps.set(InternalKeys.INTERNAL_PROVIDER_NAME, provider);
internalOps.set(OptionKeys.APPLICATION_NAME, clustername);
}
@@ -224,16 +225,18 @@
/**
* Persist this
+ * @param appconfdir conf dir
+ * @param overwrite if true, we don't need to create cluster dir
* @throws IOException
* @throws SliderException
* @throws LockAcquireFailedException
* @param appconfdir dir to persist the conf to
*/
- public void persist(Path appconfdir) throws
+ public void persist(Path appconfdir, boolean overwrite) throws
IOException,
SliderException,
LockAcquireFailedException {
- coreFS.createClusterDirectories(instancePaths);
+ if (!overwrite) coreFS.createClusterDirectories(instancePaths);
ConfPersister persister =
new ConfPersister(coreFS, getInstanceDir());
ConfDirSnapshotAction action = null;
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java
index 2cbfd54..5517771 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java
@@ -55,14 +55,14 @@
/**
* Metadata
*/
- public Map<String, Object> metadata = new HashMap<>(INITAL_MAP_CAPACITY);
+ public Map<String, Object> metadata = new HashMap<String, Object >(INITAL_MAP_CAPACITY);
/**
* Global options
*/
public Map<String, String> global =
- new HashMap<>(INITAL_MAP_CAPACITY);
+ new HashMap<String, String>(INITAL_MAP_CAPACITY);
/**
@@ -70,7 +70,7 @@
* role -> option -> value
*/
public Map<String, Map<String, String>> components =
- new HashMap<>(INITAL_MAP_CAPACITY);
+ new HashMap<String, Map<String, String>>(INITAL_MAP_CAPACITY);
/**
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java
index 1cb537a..bb17547 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java
@@ -148,7 +148,7 @@
return operations;
}
//create a new instances
- Map<String, String> map = new HashMap<>();
+ Map<String, String> map = new HashMap<String, String>();
confTree.components.put(name, map);
return new MapOperations(name, map);
}
@@ -159,7 +159,7 @@
*/
@JsonIgnore
public Set<String> getComponentNames() {
- return new HashSet<>(confTree.components.keySet());
+ return new HashSet<String>(confTree.components.keySet());
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
index bb57b94..4b1b44f 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
@@ -18,6 +18,7 @@
package org.apache.slider.core.conf;
+import com.google.common.base.Preconditions;
import org.apache.slider.common.tools.SliderUtils;
import org.apache.slider.core.exceptions.BadConfigException;
import org.slf4j.Logger;
@@ -46,7 +47,7 @@
public final String name;
public MapOperations() {
- options = new HashMap<>();
+ options = new HashMap<String, String>();
name = "";
}
@@ -61,12 +62,11 @@
this.name = name;
}
-
/**
- * Get a cluster option or value
+ * Get an option value
*
- * @param key
- * @param defVal
+ * @param key key
+ * @param defVal default value
* @return option in map or the default
*/
public String getOption(String key, String defVal) {
@@ -74,14 +74,27 @@
return val != null ? val : defVal;
}
+ /**
+ * Get a boolean option
+ *
+ * @param key option key
+ * @param defVal default value
+ * @return option true if the option equals "true", or the default value
+ * if the option was not defined at all.
+ */
+ public Boolean getOptionBool(String key, boolean defVal) {
+ String val = getOption(key, Boolean.toString(defVal));
+ return Boolean.valueOf(val);
+ }
/**
* Get a cluster option or value
*
- * @param key
+ * @param key option key
* @return the value
* @throws BadConfigException if the option is missing
*/
+
public String getMandatoryOption(String key) throws BadConfigException {
String val = options.get(key);
if (val == null) {
@@ -247,4 +260,35 @@
}
return builder.toString();
}
+
+ /**
+ * Get the time range of a set of keys
+ * @param basekey
+ * @param defDays
+ * @param defHours
+ * @param defMins
+ * @param defSecs
+ * @return
+ */
+ public long getTimeRange(String basekey,
+ int defDays,
+ int defHours,
+ int defMins,
+ int defSecs) {
+ Preconditions.checkArgument(basekey != null);
+ int days = getOptionInt(basekey + ".days", defDays);
+ int hours = getOptionInt(basekey + ".hours", defHours);
+
+ int minutes = getOptionInt(basekey + ".minutes", defMins);
+ int seconds = getOptionInt(basekey + ".seconds", defSecs);
+ // range check
+ Preconditions.checkState(days >= 0 && hours >= 0 && minutes >= 0
+ && seconds >= 0,
+ "Time range for %s has negative time component %s:%s:%s:%s",
+ basekey, days, hours, minutes, seconds);
+
+ // calculate total time, schedule the reset if expected
+ long totalMinutes = days * 24 * 60 + hours * 24 + minutes;
+ return totalMinutes * 60 + seconds;
+ }
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java b/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java
index c949c1c..894f19b 100644
--- a/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java
+++ b/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java
@@ -20,7 +20,7 @@
public interface ErrorStrings {
String E_UNSTABLE_CLUSTER = "Unstable Application Instance :";
- String E_CLUSTER_RUNNING = "Application Instance lready running";
+ String E_CLUSTER_RUNNING = "Application Instance already running";
String E_ALREADY_EXISTS = "already exists";
String PRINTF_E_INSTANCE_ALREADY_EXISTS = "Application Instance \"%s\" already exists and is defined in %s";
String PRINTF_E_INSTANCE_DIR_ALREADY_EXISTS = "Application Instance dir already exists: %s";
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java b/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java
index d8c3522..644f627 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java
@@ -18,6 +18,7 @@
package org.apache.slider.core.launch;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
@@ -57,15 +58,15 @@
/**
* Env vars; set up at final launch stage
*/
- protected final Map<String, String> envVars = new HashMap<>();
+ protected final Map<String, String> envVars = new HashMap<String, String>();
protected final MapOperations env = new MapOperations("env", envVars);
protected final ContainerLaunchContext containerLaunchContext =
Records.newRecord(ContainerLaunchContext.class);
- protected final List<String> commands = new ArrayList<>(20);
+ protected final List<String> commands = new ArrayList<String>(20);
protected final Map<String, LocalResource> localResources =
- new HashMap<>();
+ new HashMap<String, LocalResource>();
private final Map<String, ByteBuffer> serviceData =
- new HashMap<>();
+ new HashMap<String, ByteBuffer>();
// security
Credentials credentials = new Credentials();
@@ -238,6 +239,8 @@
setEnv("CLASSPATH", classpath.buildClasspath());
}
public void setEnv(String var, String value) {
+ Preconditions.checkArgument(var != null, "null variable name");
+ Preconditions.checkArgument(value != null, "null value");
env.put(var, value);
}
@@ -266,7 +269,7 @@
public String[] dumpEnvToString() {
- List<String> nodeEnv = new ArrayList<>();
+ List<String> nodeEnv = new ArrayList<String>();
for (Map.Entry<String, String> entry : env.entrySet()) {
String envElt = String.format("%s=\"%s\"",
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java b/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java
index dc4a886..bd8a0a5 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java
@@ -50,7 +50,7 @@
private final ApplicationSubmissionContext submissionContext;
private final ApplicationId appId;
private final boolean secureCluster;
- private int maxAppAttempts = 2;
+ private int maxAppAttempts = 0;
private boolean keepContainersOverRestarts = true;
private String queue = YarnConfiguration.DEFAULT_QUEUE_NAME;
private int priority = 1;
@@ -174,7 +174,10 @@
submissionContext.setKeepContainersAcrossApplicationAttempts(true);
}
- submissionContext.setMaxAppAttempts(maxAppAttempts);
+ if (maxAppAttempts > 0) {
+ log.debug("Setting max AM attempts to {}", maxAppAttempts);
+ submissionContext.setMaxAppAttempts(maxAppAttempts);
+ }
if (secureCluster) {
addSecurityTokens();
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java b/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java
index 3527149..8e49435 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java
@@ -39,7 +39,7 @@
public static final String CLASS_PATH_SEPARATOR = ApplicationConstants.CLASS_PATH_SEPARATOR;
// public static final String CLASS_PATH_SEPARATOR = File.pathSeparator;
- private final List<String> pathElements = new ArrayList<>();
+ private final List<String> pathElements = new ArrayList<String>();
public ClasspathConstructor() {
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java b/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java
index dbaa981..f50bb48 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java
@@ -30,7 +30,7 @@
* Special support for JVM command buildup.
*/
public class CommandLineBuilder {
- protected final List<String> argumentList = new ArrayList<>(20);
+ protected final List<String> argumentList = new ArrayList<String>(20);
/**
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java b/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java
index 7b60461..0367e06 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java
@@ -19,6 +19,7 @@
package org.apache.slider.core.launch;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.slider.common.tools.SliderUtils;
@@ -63,6 +64,8 @@
* @param value
*/
public void sysprop(String property, String value) {
+ Preconditions.checkArgument(property != null, "null property name");
+ Preconditions.checkArgument(value != null, "null value");
add("-D" + property + "=" + value);
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java b/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java
index c92dfda..df12849 100644
--- a/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java
+++ b/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java
@@ -202,15 +202,11 @@
* to the configuration, and <code>service</code> to the service.
*
* @param conf configuration to use
- * @throws ClassNotFoundException no such class
- * @throws InstantiationException no empty constructor,
- * problems with dependencies
* @throws ClassNotFoundException classname not on the classpath
* @throws IllegalAccessException not allowed at the class
* @throws InstantiationException not allowed to instantiate it
- * @throws InterruptedException thread interrupted
- * @throws Throwable any other failure
*/
+ @SuppressWarnings("unchecked")
public Service instantiateService(Configuration conf)
throws ClassNotFoundException, InstantiationException, IllegalAccessException,
ExitUtil.ExitException, NoSuchMethodException, InvocationTargetException {
@@ -423,7 +419,7 @@
if (argCount <= 1 ) {
return new String[0];
}
- List<String> argsList = new ArrayList<>(argCount);
+ List<String> argsList = new ArrayList<String>(argCount);
ListIterator<String> arguments = args.listIterator();
//skip that first entry
arguments.next();
@@ -447,6 +443,7 @@
try {
conf.addResource(file.toURI().toURL());
} catch (MalformedURLException e) {
+ LOG.debug("File {} cannot be converted to URL: {}", e);
exitWithMessage(EXIT_COMMAND_ARGUMENT_ERROR,
ARG_CONF + ": configuration file path invalid: " + file);
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java b/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java
index 3512168..ab71683 100644
--- a/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java
+++ b/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java
@@ -105,7 +105,8 @@
* @throws IOException IO problems
* @throws JsonMappingException failure to map from the JSON to this class
*/
- public T fromResource(String resource)
+/* JDK7
+ public T fromResource(String resource)
throws IOException, JsonParseException, JsonMappingException {
try(InputStream resStream = this.getClass().getResourceAsStream(resource)) {
if (resStream == null) {
@@ -116,6 +117,30 @@
log.error("Exception while parsing json resource {}: {}", resource, e);
throw e;
}
+ }*/
+
+ /**
+ * Convert from a JSON file
+ * @param resource input file
+ * @return the parsed JSON
+ * @throws IOException IO problems
+ * @throws JsonMappingException failure to map from the JSON to this class
+ */
+ public synchronized T fromResource(String resource)
+ throws IOException, JsonParseException, JsonMappingException {
+ InputStream resStream = null;
+ try {
+ resStream = this.getClass().getResourceAsStream(resource);
+ if (resStream == null) {
+ throw new FileNotFoundException(resource);
+ }
+ return (T) (mapper.readValue(resStream, classType));
+ } catch (IOException e) {
+ log.error("Exception while parsing json resource {}: {}", resource, e);
+ throw e;
+ } finally {
+ IOUtils.closeStream(resStream);
+ }
}
/**
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java
index f498916..eac34c0 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java
@@ -41,7 +41,7 @@
RestPaths.PUBLISHED_CONFIGURATION_REGEXP);
public Map<String, PublishedConfiguration> configurations =
- new HashMap<>();
+ new HashMap<String, PublishedConfiguration>();
public PublishedConfigSet() {
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java
index 93282cc..f76b93b 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java
@@ -46,7 +46,7 @@
public String updatedTime;
- public Map<String, String> entries = new HashMap<>();
+ public Map<String, String> entries = new HashMap<String, String>();
public PublishedConfiguration() {
}
@@ -112,7 +112,7 @@
* @param entries entries to put
*/
public void putValues(Iterable<Map.Entry<String, String>> entries) {
- this.entries = new HashMap<>();
+ this.entries = new HashMap<String, String>();
for (Map.Entry<String, String> entry : entries) {
this.entries.put(entry.getKey(), entry.getValue());
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java
index 929b8ef..bf812dd 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java
@@ -48,12 +48,24 @@
* @param dest destination file
* @throws IOException
*/
+/* JDK7
public void save(File dest) throws IOException {
try(FileOutputStream out = new FileOutputStream(dest)) {
save(out);
out.close();
}
}
+*/
+ public void save(File dest) throws IOException {
+ FileOutputStream out = null;
+ try {
+ out = new FileOutputStream(dest);
+ save(out);
+ out.close();
+ } finally {
+ org.apache.hadoop.io.IOUtils.closeStream(out);
+ }
+ }
/**
* Save the content. The default saves the asString() value
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java
index 120966f..a76e28d 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java
@@ -29,7 +29,7 @@
@JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
public class UriMap {
- public Map<String, String> uris = new HashMap<>();
+ public Map<String, String> uris = new HashMap<String, String>();
@JsonIgnore
public void put(String key, String value) {
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java b/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java
index 07e4981..bdf70a2 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java
@@ -31,7 +31,8 @@
/**
* Endpoints
*/
- public Map<String, RegisteredEndpoint> endpoints = new HashMap<>(2);
+ public Map<String, RegisteredEndpoint> endpoints =
+ new HashMap<String, RegisteredEndpoint>(2);
public String configurationsURL;
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java b/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java
index 80f0b34..c3c7e63 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java
@@ -124,13 +124,13 @@
public Map<String, RegisteredEndpoint> listEndpoints(boolean external) {
RegistryView view = getRegistryView(external);
if (view == null) {
- return new HashMap<>(0);
+ return new HashMap<String, RegisteredEndpoint>(0);
}
Map<String, RegisteredEndpoint> endpoints = view.endpoints;
if (endpoints != null) {
return endpoints;
} else {
- return new HashMap<>(0);
+ return new HashMap<String, RegisteredEndpoint>(0);
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java b/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java
index 54aeb4f..0d96559 100644
--- a/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java
+++ b/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java
@@ -49,7 +49,7 @@
public static String SVC_SLIDER = "/" + ZK_SERVICES + "/" + ZK_SLIDER;
public static String SVC_SLIDER_USERS = SVC_SLIDER + "/" + ZK_USERS;
- public static final List<String> ZK_USERS_PATH_LIST = new ArrayList<>();
+ public static final List<String> ZK_USERS_PATH_LIST = new ArrayList<String>();
static {
ZK_USERS_PATH_LIST.add(ZK_SERVICES);
ZK_USERS_PATH_LIST.add(ZK_SLIDER);
diff --git a/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java b/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java
index 8bf25f9..61b1ff0 100644
--- a/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java
@@ -49,7 +49,7 @@
if (strings != null) {
len = strings.length;
}
- List<String> tuples = new ArrayList<>(len);
+ List<String> tuples = new ArrayList<String>(len);
if (strings != null) {
for (String s : strings) {
tuples.add(s.trim());
@@ -70,7 +70,7 @@
if (strings != null) {
len = strings.length;
}
- List<HostAndPort> list = new ArrayList<>(len);
+ List<HostAndPort> list = new ArrayList<HostAndPort>(len);
if (strings != null) {
for (String s : strings) {
list.add(HostAndPort.fromString(s.trim()));
@@ -113,7 +113,7 @@
* @return
*/
public static String buildQuorum(List<HostAndPort> hostAndPorts, int defaultPort) {
- List<String> entries = new ArrayList<>(hostAndPorts.size());
+ List<String> entries = new ArrayList<String>(hostAndPorts.size());
for (HostAndPort hostAndPort : hostAndPorts) {
entries.add(buildQuorumEntry(hostAndPort, defaultPort));
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
index e35227c..3fbd3cf 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
@@ -20,9 +20,13 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.Service;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.common.tools.ConfigHelper;
+import org.apache.slider.common.tools.SliderFileSystem;
import org.apache.slider.common.tools.SliderUtils;
import org.apache.slider.core.conf.AggregateConf;
import org.apache.slider.core.exceptions.BadCommandArgumentsException;
@@ -30,7 +34,9 @@
import org.apache.slider.core.main.ExitCodeProvider;
import org.apache.slider.core.registry.info.RegisteredEndpoint;
import org.apache.slider.core.registry.info.ServiceInstanceData;
-import org.apache.slider.server.appmaster.AMViewForProviders;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.state.ContainerReleaseSelector;
+import org.apache.slider.server.appmaster.state.MostRecentContainerReleaseSelector;
import org.apache.slider.server.appmaster.state.StateAccessForProviders;
import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations;
import org.apache.slider.server.services.registry.RegistryViewForProviders;
@@ -42,7 +48,6 @@
import java.io.File;
import java.io.IOException;
-import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.HashMap;
@@ -67,8 +72,8 @@
protected AgentRestOperations restOps;
protected RegistryViewForProviders registry;
protected ServiceInstanceData registryInstanceData;
- protected AMViewForProviders amView;
protected URL amWebAPI;
+ protected QueueAccess queueAccess;
public AbstractProviderService(String name) {
super(name);
@@ -83,8 +88,8 @@
return amState;
}
- public AMViewForProviders getAppMaster() {
- return amView;
+ public QueueAccess getQueueAccess() {
+ return queueAccess;
}
public void setAmState(StateAccessForProviders amState) {
@@ -93,10 +98,12 @@
@Override
public void bind(StateAccessForProviders stateAccessor,
- RegistryViewForProviders reg, AMViewForProviders amView) {
+ RegistryViewForProviders reg,
+ QueueAccess queueAccess,
+ List<Container> liveContainers) {
this.amState = stateAccessor;
this.registry = reg;
- this.amView = amView;
+ this.queueAccess = queueAccess;
}
@Override
@@ -104,6 +111,10 @@
return restOps;
}
+ @Override
+ public void notifyContainerCompleted(ContainerId containerId) {
+ }
+
public void setAgentRestOperations(AgentRestOperations agentRestOperations) {
this.restOps = agentRestOperations;
}
@@ -136,6 +147,15 @@
/**
* No-op implementation of this method.
+ */
+ @Override
+ public void initializeApplicationConfiguration(
+ AggregateConf instanceDefinition, SliderFileSystem fileSystem)
+ throws IOException, SliderException {
+ }
+
+ /**
+ * No-op implementation of this method.
*
* {@inheritDoc}
*/
@@ -271,7 +291,7 @@
*/
@Override
public Map<String, String> buildProviderStatus() {
- return new HashMap<>();
+ return new HashMap<String, String>();
}
/*
@@ -280,7 +300,7 @@
*/
@Override
public Map<String, String> buildMonitorDetails(ClusterDescription clusterDesc) {
- Map<String, String> details = new LinkedHashMap<>();
+ Map<String, String> details = new LinkedHashMap<String, String>();
// add in all the
buildEndpointDetails(details);
@@ -313,11 +333,39 @@
}
@Override
public void applyInitialRegistryDefinitions(URL unsecureWebAPI,
- URL secureWebAPI,
- ServiceInstanceData registryInstanceData) throws MalformedURLException,
- IOException {
+ URL secureWebAPI,
+ ServiceInstanceData registryInstanceData) throws IOException {
this.amWebAPI = unsecureWebAPI;
this.registryInstanceData = registryInstanceData;
}
+
+ /**
+ * {@inheritDoc}
+ *
+ *
+ * @return The base implementation returns the most recent containers first.
+ */
+ @Override
+ public ContainerReleaseSelector createContainerReleaseSelector() {
+ return new MostRecentContainerReleaseSelector();
+ }
+
+ @Override
+ public void releaseAssignedContainer(ContainerId containerId) {
+ // no-op
+ }
+
+ @Override
+ public void addContainerRequest(AMRMClient.ContainerRequest req) {
+ // no-op
+ }
+
+ /**
+ * No-op implementation of this method.
+ */
+ @Override
+ public void rebuildContainerDetails(List<Container> liveContainers,
+ String applicationId, Map<Integer, ProviderRole> providerRoles) {
+ }
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java
index 56e24e9..0f5b4fb 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java
@@ -22,6 +22,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.common.tools.SliderFileSystem;
import org.apache.slider.core.conf.AggregateConf;
@@ -31,19 +32,23 @@
import org.apache.slider.core.launch.ContainerLauncher;
import org.apache.slider.core.main.ExitCodeProvider;
import org.apache.slider.core.registry.info.ServiceInstanceData;
-import org.apache.slider.server.appmaster.AMViewForProviders;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.state.ContainerReleaseSelector;
+import org.apache.slider.server.appmaster.operations.RMOperationHandlerActions;
import org.apache.slider.server.appmaster.state.StateAccessForProviders;
import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations;
import org.apache.slider.server.services.registry.RegistryViewForProviders;
import java.io.File;
import java.io.IOException;
-import java.net.MalformedURLException;
import java.net.URL;
+import java.util.List;
import java.util.Map;
-public interface ProviderService extends ProviderCore, Service,
- ExitCodeProvider {
+public interface ProviderService extends ProviderCore,
+ Service,
+ RMOperationHandlerActions,
+ ExitCodeProvider {
/**
* Set up the entire container launch context
@@ -69,6 +74,12 @@
SliderException;
/**
+ * Notify the providers of container completion
+ * @param containerId container that has completed
+ */
+ void notifyContainerCompleted(ContainerId containerId);
+
+ /**
* Execute a process in the AM
* @param instanceDefinition cluster description
* @param confDir configuration directory
@@ -104,6 +115,17 @@
throws BadCommandArgumentsException, IOException;
/**
+ * The application configuration should be initialized here
+ *
+ * @param instanceDefinition
+ * @param fileSystem
+ * @throws IOException
+ * @throws SliderException
+ */
+ void initializeApplicationConfiguration(AggregateConf instanceDefinition,
+ SliderFileSystem fileSystem) throws IOException, SliderException;
+
+ /**
* This is a validation of the application configuration on the AM.
* Here is where things like the existence of keytabs and other
* not-seen-client-side properties can be tested, before
@@ -135,15 +157,10 @@
*/
Map<String, String> buildMonitorDetails(ClusterDescription clusterSpec);
- /**
- * bind operation -invoked before the service is started
- * @param stateAccessor interface offering read access to the state
- * @param registry
- * @param amView
- */
- void bind(StateAccessForProviders stateAccessor,
- RegistryViewForProviders registry,
- AMViewForProviders amView);
+ public void bind(StateAccessForProviders stateAccessor,
+ RegistryViewForProviders reg,
+ QueueAccess queueAccess,
+ List<Container> liveContainers);
/**
* Returns the agent rest operations interface.
@@ -165,6 +182,24 @@
*/
void applyInitialRegistryDefinitions(URL unsecureWebAPI,
URL secureWebAPI,
- ServiceInstanceData registryInstanceData) throws MalformedURLException,
- IOException;
+ ServiceInstanceData registryInstanceData)
+ throws IOException;
+
+ /**
+ * Create the container release selector for this provider...any policy
+ * can be implemented
+ * @return the selector to use for choosing containers.
+ */
+ ContainerReleaseSelector createContainerReleaseSelector();
+
+ /**
+ * On AM restart (for whatever reason) this API is required to rebuild the AM
+ * internal state with the containers which were already assigned and running
+ *
+ * @param liveContainers
+ * @param applicationId
+ * @param providerRoles
+ */
+ void rebuildContainerDetails(List<Container> liveContainers,
+ String applicationId, Map<Integer, ProviderRole> providerRoles);
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java b/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java
index cb7d27a..4b8724a 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java
@@ -23,6 +23,7 @@
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.slider.api.ClusterDescription;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.RoleKeys;
@@ -282,9 +283,9 @@
MapOperations globalOptions =
instanceDefinition.getInternalOperations().getGlobalOptions();
String applicationHome =
- globalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME);
+ globalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME);
String imagePath =
- globalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ globalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
return buildPathToHomeDir(imagePath, applicationHome, bindir, script);
}
@@ -350,8 +351,8 @@
String script) throws FileNotFoundException {
String homedir = buildPathToHomeDir(
- internal.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH),
- internal.get(OptionKeys.INTERNAL_APPLICATION_HOME),
+ internal.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH),
+ internal.get(InternalKeys.INTERNAL_APPLICATION_HOME),
bindir,
script);
return buildScriptPath(bindir, script, homedir);
@@ -418,7 +419,7 @@
}
log.debug("Found {} entries in {}", ls.length, base);
- List<File> directories = new LinkedList<>();
+ List<File> directories = new LinkedList<File>();
StringBuilder dirs = new StringBuilder();
for (File file : ls) {
log.debug("{}", false);
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java
index 3835df6..3a1ee76 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java
@@ -21,7 +21,7 @@
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.common.tools.SliderFileSystem;
@@ -103,7 +103,7 @@
getGlobalOptions().get(AgentKeys.PACKAGE_PATH);
if (SliderUtils.isUnset(appHome)) {
String agentImage = instanceDefinition.getInternalOperations().
- get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
sliderFileSystem.verifyFileExists(new Path(agentImage));
}
}
@@ -168,12 +168,12 @@
String appHome = instanceDefinition.getAppConfOperations().
getGlobalOptions().get(AgentKeys.PACKAGE_PATH);
String agentImage = instanceDefinition.getInternalOperations().
- get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
if (SliderUtils.isUnset(appHome) && SliderUtils.isUnset(agentImage)) {
throw new BadConfigException("Either agent package path " +
AgentKeys.PACKAGE_PATH + " or image root " +
- OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH
+ InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH
+ " must be provided.");
}
}
@@ -210,7 +210,7 @@
}
Application application = metainfo.getApplication();
- tags = new HashSet<>();
+ tags = new HashSet<String>();
tags.add("Name: " + application.getName());
tags.add("Version: " + application.getVersion());
tags.add("Description: " + SliderUtils.truncate(application.getComment(), 80));
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
index 31d09c4..419fa1a 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
@@ -68,6 +68,8 @@
String ARG_HOST = "--host";
String ARG_PORT = "--port";
String ARG_SECURED_PORT = "--secured_port";
+ String ARG_ZOOKEEPER_QUORUM = "--zk-quorum";
+ String ARG_ZOOKEEPER_REGISTRY_PATH = "--zk-reg-path";
String ARG_DEBUG = "--debug";
String AGENT_MAIN_SCRIPT_ROOT = "./infra/agent/slider-agent/";
String AGENT_MAIN_SCRIPT = "agent/main.py";
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java
index b839e58..c8b0e1d 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java
@@ -94,7 +94,7 @@
}
if (trackers == null) {
- trackers = new HashMap<>(10);
+ trackers = new HashMap<String, CommandTracker>(10);
}
String componentName = parameters[0];
CommandTracker tracker = new CommandTracker(Arrays.copyOfRange(parameters, 1, parameters.length));
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
index c1719b7..c7a82d3 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
@@ -19,16 +19,19 @@
package org.apache.slider.providers.agent;
import com.google.common.annotations.VisibleForTesting;
+import org.apache.curator.utils.ZKPaths;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.ClusterDescriptionKeys;
import org.apache.slider.api.ClusterNode;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.StatusKeys;
import org.apache.slider.common.SliderKeys;
@@ -38,6 +41,8 @@
import org.apache.slider.core.conf.ConfTreeOperations;
import org.apache.slider.core.conf.MapOperations;
import org.apache.slider.core.exceptions.BadCommandArgumentsException;
+import org.apache.slider.core.exceptions.BadConfigException;
+import org.apache.slider.core.exceptions.NoSuchNodeException;
import org.apache.slider.core.exceptions.SliderException;
import org.apache.slider.core.launch.CommandLineBuilder;
import org.apache.slider.core.launch.ContainerLauncher;
@@ -52,9 +57,16 @@
import org.apache.slider.providers.ProviderUtils;
import org.apache.slider.providers.agent.application.metadata.Application;
import org.apache.slider.providers.agent.application.metadata.Component;
+import org.apache.slider.providers.agent.application.metadata.ComponentExport;
import org.apache.slider.providers.agent.application.metadata.Export;
import org.apache.slider.providers.agent.application.metadata.ExportGroup;
import org.apache.slider.providers.agent.application.metadata.Metainfo;
+import org.apache.slider.providers.agent.application.metadata.OSPackage;
+import org.apache.slider.providers.agent.application.metadata.OSSpecific;
+import org.apache.slider.server.appmaster.actions.ProviderReportedContainerLoss;
+import org.apache.slider.server.appmaster.actions.RegisterComponentInstance;
+import org.apache.slider.server.appmaster.state.ContainerPriority;
+import org.apache.slider.server.appmaster.state.RoleInstance;
import org.apache.slider.server.appmaster.state.StateAccessForProviders;
import org.apache.slider.server.appmaster.web.rest.agent.AgentCommandType;
import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations;
@@ -85,8 +97,10 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import static org.apache.slider.server.appmaster.web.rest.RestPaths.SLIDER_PATH_AGENTS;
@@ -107,25 +121,37 @@
private static final String CONTAINER_ID = "container_id";
private static final String GLOBAL_CONFIG_TAG = "global";
private static final String LOG_FOLDERS_TAG = "LogFolders";
+ private static final String COMPONENT_DATA_TAG = "ComponentInstanceData";
+ private static final String SHARED_PORT_TAG = "SHARED";
+ private static final String DO_NOT_PROPAGATE_TAG = "{DO_NOT_PROPAGATE}";
private static final int MAX_LOG_ENTRIES = 20;
private static final int DEFAULT_HEARTBEAT_MONITOR_INTERVAL = 60 * 1000;
+
private final Object syncLock = new Object();
- private final Map<String, String> allocatedPorts = new ConcurrentHashMap<>();
private int heartbeatMonitorInterval = 0;
private AgentClientProvider clientProvider;
- private Map<String, ComponentInstanceState> componentStatuses = new ConcurrentHashMap<>();
private AtomicInteger taskId = new AtomicInteger(0);
private volatile Metainfo metainfo = null;
private ComponentCommandOrder commandOrder = null;
private HeartbeatMonitor monitor;
- private Map<String, String> workFolders =
+ private Boolean canAnyMasterPublish = null;
+ private AgentLaunchParameter agentLaunchParameter = null;
+ private String clusterName = null;
+
+ private final Map<String, ComponentInstanceState> componentStatuses =
+ new ConcurrentHashMap<String, ComponentInstanceState>();
+ private final Map<String, Map<String, String>> componentInstanceData =
+ new ConcurrentHashMap<String, Map<String, String>>();
+ private final Map<String, Map<String, String>> exportGroups =
+ new ConcurrentHashMap<String, Map<String, String>>();
+ private final Map<String, Map<String, String>> allocatedPorts =
+ new ConcurrentHashMap<String, Map<String, String>>();
+ private final Map<String, String> workFolders =
Collections.synchronizedMap(new LinkedHashMap<String, String>(MAX_LOG_ENTRIES, 0.75f, false) {
protected boolean removeEldestEntry(Map.Entry eldest) {
return size() > MAX_LOG_ENTRIES;
}
});
- private Boolean canAnyMasterPublish = null;
- private AgentLaunchParameter agentLaunchParameter = null;
/**
* Create an instance of AgentProviderService
@@ -161,6 +187,40 @@
clientProvider.validateInstanceDefinition(instanceDefinition);
}
+ // Reads the metainfo.xml in the application package and loads it
+ private void buildMetainfo(AggregateConf instanceDefinition,
+ SliderFileSystem fileSystem) throws IOException, SliderException {
+ String appDef = instanceDefinition.getAppConfOperations()
+ .getGlobalOptions().getMandatoryOption(AgentKeys.APP_DEF);
+
+ if (metainfo == null) {
+ synchronized (syncLock) {
+ if (metainfo == null) {
+ readAndSetHeartbeatMonitoringInterval(instanceDefinition);
+ initializeAgentDebugCommands(instanceDefinition);
+
+ metainfo = getApplicationMetainfo(fileSystem, appDef);
+ if (metainfo == null || metainfo.getApplication() == null) {
+ log.error("metainfo.xml is unavailable or malformed at {}.", appDef);
+ throw new SliderException(
+ "metainfo.xml is required in app package.");
+ }
+ commandOrder = new ComponentCommandOrder(metainfo.getApplication()
+ .getCommandOrder());
+ monitor = new HeartbeatMonitor(this, getHeartbeatMonitorInterval());
+ monitor.start();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void initializeApplicationConfiguration(
+ AggregateConf instanceDefinition, SliderFileSystem fileSystem)
+ throws IOException, SliderException {
+ buildMetainfo(instanceDefinition, fileSystem);
+ }
+
@Override
public void buildContainerLaunchContext(ContainerLauncher launcher,
AggregateConf instanceDefinition,
@@ -177,24 +237,7 @@
String appDef = instanceDefinition.getAppConfOperations().
getGlobalOptions().getMandatoryOption(AgentKeys.APP_DEF);
- if (metainfo == null) {
- synchronized (syncLock) {
- if (metainfo == null) {
- readAndSetHeartbeatMonitoringInterval(instanceDefinition);
- initializeAgentDebugCommands(instanceDefinition);
-
- metainfo = getApplicationMetainfo(fileSystem, appDef);
- if (metainfo == null || metainfo.getApplication() == null) {
- log.error("metainfo.xml is unavailable or malformed at {}.", appDef);
- throw new SliderException("metainfo.xml is required in app package.");
- }
-
- commandOrder = new ComponentCommandOrder(metainfo.getApplication().getCommandOrder());
- monitor = new HeartbeatMonitor(this, getHeartbeatMonitorInterval());
- monitor.start();
- }
- }
- }
+ initializeApplicationConfiguration(instanceDefinition, fileSystem);
log.info("Build launch context for Agent");
log.debug(instanceDefinition.toString());
@@ -208,7 +251,9 @@
String logDir = ApplicationConstants.Environment.LOG_DIRS.$();
launcher.setEnv("AGENT_LOG_ROOT", logDir);
log.info("AGENT_LOG_ROOT set to {}", logDir);
- launcher.setEnv(HADOOP_USER_NAME, System.getenv(HADOOP_USER_NAME));
+ if (System.getenv(HADOOP_USER_NAME) != null) {
+ launcher.setEnv(HADOOP_USER_NAME, System.getenv(HADOOP_USER_NAME));
+ }
// for 2-Way SSL
launcher.setEnv(SLIDER_PASSPHRASE, SliderKeys.PASSPHRASE);
@@ -222,8 +267,15 @@
scriptPath = new File(appHome, AgentKeys.AGENT_MAIN_SCRIPT).getPath();
}
+ // set PYTHONPATH
+ List<String> pythonPaths = new ArrayList<String>();
+ pythonPaths.add(AgentKeys.AGENT_MAIN_SCRIPT_ROOT);
+ String pythonPath = StringUtils.join(File.pathSeparator, pythonPaths);
+ launcher.setEnv(PYTHONPATH, pythonPath);
+ log.info("PYTHONPATH set to {}", pythonPath);
+
String agentImage = instanceDefinition.getInternalOperations().
- get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
if (agentImage != null) {
LocalResource agentImageRes = fileSystem.createAmResource(new Path(agentImage), LocalResourceType.ARCHIVE);
launcher.addLocalResource(AgentKeys.AGENT_INSTALL_DIR, agentImageRes);
@@ -260,12 +312,10 @@
operation.add(scriptPath);
operation.add(ARG_LABEL, label);
- operation.add(ARG_HOST);
- operation.add(getClusterInfoPropertyValue(StatusKeys.INFO_AM_HOSTNAME));
- operation.add(ARG_PORT);
- operation.add(getClusterInfoPropertyValue(StatusKeys.INFO_AM_AGENT_PORT));
- operation.add(ARG_SECURED_PORT);
- operation.add(getClusterInfoPropertyValue(StatusKeys.INFO_AM_SECURED_AGENT_PORT));
+ operation.add(ARG_ZOOKEEPER_QUORUM);
+ operation.add(getClusterOptionPropertyValue(OptionKeys.ZOOKEEPER_QUORUM));
+ operation.add(ARG_ZOOKEEPER_REGISTRY_PATH);
+ operation.add(getZkRegistryPath());
String debugCmd = agentLaunchParameter.getNextLaunchParameter(role);
if (debugCmd != null && debugCmd.length() != 0) {
@@ -276,11 +326,303 @@
launcher.addCommand(operation.build());
// initialize the component instance state
- componentStatuses.put(label,
- new ComponentInstanceState(
- role,
- container.getId().toString(),
- getClusterInfoPropertyValue(OptionKeys.APPLICATION_NAME)));
+ getComponentStatuses().put(label,
+ new ComponentInstanceState(
+ role,
+ container.getId(),
+ getClusterInfoPropertyValue(OptionKeys.APPLICATION_NAME)));
+ }
+
+ // build the zookeeper registry path
+ private String getZkRegistryPath() {
+ String zkRegistryRoot = getConfig().get(REGISTRY_PATH,
+ DEFAULT_REGISTRY_PATH);
+ String appType = APP_TYPE;
+ String zkRegistryPath = ZKPaths.makePath(zkRegistryRoot, appType);
+ String clusterName = getAmState().getInternalsSnapshot().get(
+ OptionKeys.APPLICATION_NAME);
+ zkRegistryPath = ZKPaths.makePath(zkRegistryPath, clusterName);
+ return zkRegistryPath;
+ }
+
+ @Override
+ public void rebuildContainerDetails(List<Container> liveContainers,
+ String applicationId, Map<Integer, ProviderRole> providerRoleMap) {
+ for (Container container : liveContainers) {
+ // get the role name and label
+ ProviderRole role = providerRoleMap.get(ContainerPriority
+ .extractRole(container));
+ if (role != null) {
+ String roleName = role.name;
+ String label = getContainerLabel(container, roleName);
+ log.info("Rebuilding in-memory: container {} in role {} in cluster {}",
+ container.getId(), roleName, applicationId);
+ getComponentStatuses().put(
+ label,
+ new ComponentInstanceState(roleName, container.getId(),
+ applicationId));
+ } else {
+ log.warn("Role not found for container {} in cluster {}",
+ container.getId(), applicationId);
+ }
+ }
+ }
+
+ /**
+ * Run this service
+ *
+ * @param instanceDefinition component description
+ * @param confDir local dir with the config
+ * @param env environment variables above those generated by
+ * @param execInProgress callback for the event notification
+ *
+ * @throws IOException IO problems
+ * @throws SliderException anything internal
+ */
+ @Override
+ public boolean exec(AggregateConf instanceDefinition,
+ File confDir,
+ Map<String, String> env,
+ ProviderCompleted execInProgress) throws
+ IOException,
+ SliderException {
+
+ return false;
+ }
+
+ @Override
+ public boolean isSupportedRole(String role) {
+ return true;
+ }
+
+ /**
+ * Handle registration calls from the agents
+ * @param registration
+ * @return
+ */
+ @Override
+ public RegistrationResponse handleRegistration(Register registration) {
+ log.info("Handling registration: " + registration);
+ RegistrationResponse response = new RegistrationResponse();
+ String label = registration.getHostname();
+ State agentState = registration.getActualState();
+ if (getComponentStatuses().containsKey(label)) {
+ response.setResponseStatus(RegistrationStatus.OK);
+ ComponentInstanceState componentStatus = getComponentStatuses().get(label);
+ componentStatus.heartbeat(System.currentTimeMillis());
+ updateComponentStatusWithAgentState(componentStatus, agentState);
+
+ Map<String, String> ports = registration.getAllocatedPorts();
+ if (ports != null && !ports.isEmpty()) {
+ String roleName = getRoleName(label);
+ String containerId = getContainerId(label);
+ processAllocatedPorts(registration.getPublicHostname(), roleName, containerId, ports);
+ }
+ } else {
+ response.setResponseStatus(RegistrationStatus.FAILED);
+ response.setLog("Label not recognized.");
+ log.warn("Received registration request from unknown label {}", label);
+ }
+ log.info("Registration response: " + response);
+ return response;
+ }
+
+ /**
+ * Handle heartbeat response from agents
+ * @param heartBeat
+ * @return
+ */
+ @Override
+ public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
+ log.debug("Handling heartbeat: " + heartBeat);
+ HeartBeatResponse response = new HeartBeatResponse();
+ long id = heartBeat.getResponseId();
+ response.setResponseId(id + 1L);
+
+ String label = heartBeat.getHostname();
+ String roleName = getRoleName(label);
+ String containerId = getContainerId(label);
+
+ StateAccessForProviders accessor = getAmState();
+ String scriptPath = getScriptPathFromMetainfo(roleName);
+
+ if (scriptPath == null) {
+ log.error("role.script is unavailable for " + roleName + ". Commands will not be sent.");
+ return response;
+ }
+
+ if (!getComponentStatuses().containsKey(label)) {
+ return response;
+ }
+
+ Boolean isMaster = isMaster(roleName);
+ ComponentInstanceState componentStatus = getComponentStatuses().get(label);
+ componentStatus.heartbeat(System.currentTimeMillis());
+
+ publishConfigAndExportGroups(heartBeat, componentStatus, roleName);
+
+ List<CommandReport> reports = heartBeat.getReports();
+ if (reports != null && !reports.isEmpty()) {
+ CommandReport report = reports.get(0);
+ Map<String, String> ports = report.getAllocatedPorts();
+ if (ports != null && !ports.isEmpty()) {
+ processAllocatedPorts(heartBeat.getFqdn(), roleName, containerId, ports);
+ }
+ CommandResult result = CommandResult.getCommandResult(report.getStatus());
+ Command command = Command.getCommand(report.getRoleCommand());
+ componentStatus.applyCommandResult(result, command);
+ log.info("Component operation. Status: {}", result);
+
+ if (command == Command.INSTALL && report.getFolders() != null && report.getFolders().size() > 0) {
+ publishLogFolderPaths(report.getFolders(), containerId, heartBeat.getFqdn());
+ }
+ }
+
+ int waitForCount = accessor.getInstanceDefinitionSnapshot().
+ getAppConfOperations().getComponentOptInt(roleName, AgentKeys.WAIT_HEARTBEAT, 0);
+
+ if (id < waitForCount) {
+ log.info("Waiting until heartbeat count {}. Current val: {}", waitForCount, id);
+ getComponentStatuses().put(roleName, componentStatus);
+ return response;
+ }
+
+ Command command = componentStatus.getNextCommand();
+ try {
+ if (Command.NOP != command) {
+ if (command == Command.INSTALL) {
+ log.info("Installing {} on {}.", roleName, containerId);
+ addInstallCommand(roleName, containerId, response, scriptPath);
+ componentStatus.commandIssued(command);
+ } else if (command == Command.START) {
+ // check against dependencies
+ boolean canExecute = commandOrder.canExecute(roleName, command, getComponentStatuses().values());
+ if (canExecute) {
+ log.info("Starting {} on {}.", roleName, containerId);
+ addStartCommand(roleName, containerId, response, scriptPath, isMarkedAutoRestart(roleName));
+ componentStatus.commandIssued(command);
+ } else {
+ log.info("Start of {} on {} delayed as dependencies have not started.", roleName, containerId);
+ }
+ }
+ }
+
+ // if there is no outstanding command then retrieve config
+ if (isMaster && componentStatus.getState() == State.STARTED
+ && command == Command.NOP) {
+ if (!componentStatus.getConfigReported()) {
+ log.info("Requesting applied config for {} on {}.", roleName, containerId);
+ addGetConfigCommand(roleName, containerId, response);
+ }
+ }
+
+ // if restart is required then signal
+ response.setRestartEnabled(false);
+ if (componentStatus.getState() == State.STARTED
+ && command == Command.NOP && isMarkedAutoRestart(roleName)) {
+ response.setRestartEnabled(true);
+ }
+ } catch (SliderException e) {
+ componentStatus.applyCommandResult(CommandResult.FAILED, command);
+ log.warn("Component instance failed operation.", e);
+ }
+
+ log.debug("Heartbeat response: " + response);
+ return response;
+ }
+
+ protected void processAllocatedPorts(String fqdn,
+ String roleName,
+ String containerId,
+ Map<String, String> ports) {
+ RoleInstance instance;
+ try {
+ instance = getAmState().getOwnedContainer(containerId);
+ } catch (NoSuchNodeException e) {
+ log.warn("Failed to locate instance of container {}: {}", containerId, e);
+ instance = null;
+ }
+ for (Map.Entry<String, String> port : ports.entrySet()) {
+ String portname = port.getKey();
+ String portNo = port.getValue();
+ log.info("Recording allocated port for {} as {}", portname, portNo);
+ this.getAllocatedPorts().put(portname, portNo);
+ this.getAllocatedPorts(containerId).put(portname, portNo);
+ if (instance!=null) {
+ try {
+ instance.registerPortEndpoint(Integer.valueOf(portNo), portname, "");
+ } catch (NumberFormatException e) {
+ log.warn("Failed to parse {}: {}", portNo, e);
+ }
+ }
+ }
+
+ // component specific publishes
+ processAndPublishComponentSpecificData(ports, containerId, fqdn, roleName);
+
+ // and update registration entries
+ if (instance != null) {
+ queueAccess.put(new RegisterComponentInstance(instance.getId(), 0,
+ TimeUnit.MILLISECONDS));
+ }
+ }
+
+ private void updateComponentStatusWithAgentState(
+ ComponentInstanceState componentStatus, State agentState) {
+ if (agentState != null) {
+ componentStatus.setState(agentState);
+ }
+ }
+
+ @Override
+ public Map<String, String> buildMonitorDetails(ClusterDescription clusterDesc) {
+ Map<String, String> details = super.buildMonitorDetails(clusterDesc);
+ buildRoleHostDetails(details);
+ return details;
+ }
+
+ @Override
+ public void applyInitialRegistryDefinitions(URL unsecureWebAPI,
+ URL secureWebAPI,
+ ServiceInstanceData instanceData) throws IOException {
+ super.applyInitialRegistryDefinitions(unsecureWebAPI,
+ secureWebAPI,
+ instanceData
+ );
+
+ try {
+ instanceData.internalView.endpoints.put(
+ CustomRegistryConstants.AGENT_REST_API,
+ new RegisteredEndpoint(
+ new URL(secureWebAPI, SLIDER_PATH_AGENTS),
+ "Agent REST API"));
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public void notifyContainerCompleted(ContainerId containerId) {
+ if (containerId != null) {
+ String containerIdStr = containerId.toString();
+ if (getComponentInstanceData().containsKey(containerIdStr)) {
+ getComponentInstanceData().remove(containerIdStr);
+ log.info("Removing container specific data for {}", containerIdStr);
+ publishComponentInstanceData();
+ }
+
+ if (this.allocatedPorts.containsKey(containerIdStr)) {
+ this.allocatedPorts.remove(containerIdStr);
+ }
+
+ synchronized (this.componentStatuses) {
+ for (String label : getComponentStatuses().keySet()) {
+ if (label.startsWith(containerIdStr)) {
+ getComponentStatuses().remove(label);
+ }
+ }
+ }
+ }
}
/**
@@ -337,14 +679,21 @@
return this.heartbeatMonitorInterval;
}
+ private String getClusterName() {
+ if (clusterName == null || clusterName.length() == 0) {
+ clusterName = getAmState().getInternalsSnapshot().get(OptionKeys.APPLICATION_NAME);
+ }
+ return clusterName;
+ }
+
/**
- * Publish a named config bag that may contain name-value pairs for app configurations such as hbase-site
+ * Publish a named property bag that may contain name-value pairs for app configurations such as hbase-site
* @param name
* @param description
* @param entries
*/
- protected void publishComponentConfiguration(String name, String description,
- Iterable<Map.Entry<String, String>> entries) {
+ protected void publishApplicationInstanceData(String name, String description,
+ Iterable<Map.Entry<String, String>> entries) {
PublishedConfiguration pubconf = new PublishedConfiguration();
pubconf.description = description;
pubconf.putValues(entries);
@@ -374,45 +723,26 @@
return description.getInfo(name);
}
- /**
- * Lost heartbeat from the container - release it and ask for a replacement
- *
- * @param label
- *
- * @return if release is requested successfully
- */
- protected boolean releaseContainer(String label) {
- componentStatuses.remove(label);
- try {
- getAppMaster().refreshContainer(getContainerId(label), true);
- } catch (SliderException e) {
- log.info("Error while requesting container release for {}. Message: {}", label, e.getMessage());
- return false;
- }
-
- return true;
+ protected String getClusterOptionPropertyValue(String name)
+ throws BadConfigException {
+ StateAccessForProviders accessor = getAmState();
+ assert accessor.isApplicationLive();
+ ClusterDescription description = accessor.getClusterStatus();
+ return description.getMandatoryOption(name);
}
/**
- * Run this service
+ * Lost heartbeat from the container - release it and ask for a replacement
+ * (async operation)
+ * @param label
+ * @param containerId
*
- * @param instanceDefinition component description
- * @param confDir local dir with the config
- * @param env environment variables above those generated by
- * @param execInProgress callback for the event notification
- *
- * @throws IOException IO problems
- * @throws SliderException anything internal
*/
- @Override
- public boolean exec(AggregateConf instanceDefinition,
- File confDir,
- Map<String, String> env,
- ProviderCompleted execInProgress) throws
- IOException,
- SliderException {
-
- return false;
+ protected void lostContainer(
+ String label,
+ ContainerId containerId) {
+ getComponentStatuses().remove(label);
+ getQueueAccess().put(new ProviderReportedContainerLoss(containerId));
}
/**
@@ -421,174 +751,88 @@
* @return the provider status - map of entries to add to the info section
*/
public Map<String, String> buildProviderStatus() {
- Map<String, String> stats = new HashMap<>();
+ Map<String, String> stats = new HashMap<String, String>();
return stats;
}
- @Override
- public boolean isSupportedRole(String role) {
- return true;
- }
/**
- * Handle registration calls from the agents
- * @param registration
- * @return
- */
- @Override
- public RegistrationResponse handleRegistration(Register registration) {
- RegistrationResponse response = new RegistrationResponse();
- String label = registration.getHostname();
- if (componentStatuses.containsKey(label)) {
- response.setResponseStatus(RegistrationStatus.OK);
- componentStatuses.get(label).setLastHeartbeat(System.currentTimeMillis());
- } else {
- response.setResponseStatus(RegistrationStatus.FAILED);
- response.setLog("Label not recognized.");
- }
- return response;
- }
-
- /**
- * Handle heartbeat response from agents
- * @param heartBeat
- * @return
- */
- @Override
- public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
- HeartBeatResponse response = new HeartBeatResponse();
- long id = heartBeat.getResponseId();
- response.setResponseId(id + 1L);
-
- String label = heartBeat.getHostname();
- String roleName = getRoleName(label);
-
- String containerId = getContainerId(label);
- StateAccessForProviders accessor = getAmState();
- String scriptPath = getScriptPathFromMetainfo(roleName);
-
- if (scriptPath == null) {
- log.error("role.script is unavailable for " + roleName + ". Commands will not be sent.");
- return response;
- }
-
- if (!componentStatuses.containsKey(label)) {
- return response;
- }
-
- Boolean isMaster = isMaster(roleName);
- ComponentInstanceState componentStatus = componentStatuses.get(label);
- componentStatus.setLastHeartbeat(System.currentTimeMillis());
- // If no Master can explicitly publish then publish if its a master
- // Otherwise, wait till the master that can publish is ready
- if (isMaster &&
- (canAnyMasterPublishConfig() == false || canPublishConfig(roleName))) {
- processReturnedStatus(heartBeat, componentStatus);
- }
-
- List<CommandReport> reports = heartBeat.getReports();
- if (reports != null && !reports.isEmpty()) {
- CommandReport report = reports.get(0);
- Map<String, String> ports = report.getAllocatedPorts();
- if (ports != null && !ports.isEmpty()) {
- for (Map.Entry<String, String> port : ports.entrySet()) {
- log.info("Recording allocated port for {} as {}", port.getKey(), port.getValue());
- this.allocatedPorts.put(port.getKey(), port.getValue());
- }
- }
- CommandResult result = CommandResult.getCommandResult(report.getStatus());
- Command command = Command.getCommand(report.getRoleCommand());
- componentStatus.applyCommandResult(result, command);
- log.info("Component operation. Status: {}", result);
-
- if (command == Command.INSTALL && report.getFolders() != null && report.getFolders().size() > 0) {
- processFolderPaths(report.getFolders(), containerId, heartBeat.getFqdn());
- }
- }
-
- int waitForCount = accessor.getInstanceDefinitionSnapshot().
- getAppConfOperations().getComponentOptInt(roleName, AgentKeys.WAIT_HEARTBEAT, 0);
-
- if (id < waitForCount) {
- log.info("Waiting until heartbeat count {}. Current val: {}", waitForCount, id);
- componentStatuses.put(roleName, componentStatus);
- return response;
- }
-
- Command command = componentStatus.getNextCommand();
- try {
- if (Command.NOP != command) {
- if (command == Command.INSTALL) {
- log.info("Installing {} on {}.", roleName, containerId);
- addInstallCommand(roleName, containerId, response, scriptPath);
- componentStatus.commandIssued(command);
- } else if (command == Command.START) {
- // check against dependencies
- boolean canExecute = commandOrder.canExecute(roleName, command, componentStatuses.values());
- if (canExecute) {
- log.info("Starting {} on {}.", roleName, containerId);
- addStartCommand(roleName, containerId, response, scriptPath);
- componentStatus.commandIssued(command);
- } else {
- log.info("Start of {} on {} delayed as dependencies have not started.", roleName, containerId);
- }
- }
- }
- // if there is no outstanding command then retrieve config
- if (isMaster && componentStatus.getState() == State.STARTED
- && command == Command.NOP) {
- if (!componentStatus.getConfigReported()) {
- addGetConfigCommand(roleName, containerId, response);
- }
- }
- } catch (SliderException e) {
- componentStatus.applyCommandResult(CommandResult.FAILED, command);
- log.warn("Component instance failed operation.", e);
- }
-
- return response;
- }
-
- /**
- * Format the folder locations before publishing in the registry service
+ * Format the folder locations and publish in the registry service
* @param folders
* @param containerId
* @param hostFqdn
*/
- private void processFolderPaths(Map<String, String> folders, String containerId, String hostFqdn) {
+ private void publishLogFolderPaths(Map<String, String> folders, String containerId, String hostFqdn) {
for (String key : folders.keySet()) {
workFolders.put(String.format("%s-%s-%s", hostFqdn, containerId, key), folders.get(key));
}
- publishComponentConfiguration(LOG_FOLDERS_TAG, LOG_FOLDERS_TAG, (new HashMap<>(this.workFolders)).entrySet());
+ publishApplicationInstanceData(LOG_FOLDERS_TAG, LOG_FOLDERS_TAG,
+ (new HashMap<String, String>(this.workFolders)).entrySet());
}
+
/**
* Process return status for component instances
+ *
* @param heartBeat
* @param componentStatus
*/
- protected void processReturnedStatus(HeartBeat heartBeat, ComponentInstanceState componentStatus) {
+ protected void publishConfigAndExportGroups(
+ HeartBeat heartBeat, ComponentInstanceState componentStatus, String roleName) {
List<ComponentStatus> statuses = heartBeat.getComponentStatus();
if (statuses != null && !statuses.isEmpty()) {
log.info("Processing {} status reports.", statuses.size());
for (ComponentStatus status : statuses) {
log.info("Status report: " + status.toString());
+
if (status.getConfigs() != null) {
- for (String key : status.getConfigs().keySet()) {
- Map<String, String> configs = status.getConfigs().get(key);
- publishComponentConfiguration(key, key, configs.entrySet());
+ Application application = getMetainfo().getApplication();
+
+ if (canAnyMasterPublishConfig() == false || canPublishConfig(roleName)) {
+ // If no Master can explicitly publish then publish if its a master
+ // Otherwise, wait till the master that can publish is ready
+
+ Set<String> exportedConfigs = new HashSet();
+ String exportedConfigsStr = application.getExportedConfigs();
+ boolean exportedAllConfigs = exportedConfigsStr == null || exportedConfigsStr.isEmpty();
+ if (!exportedAllConfigs) {
+ for (String exportedConfig : exportedConfigsStr.split(",")) {
+ if (exportedConfig.trim().length() > 0) {
+ exportedConfigs.add(exportedConfig.trim());
+ }
+ }
+ }
+
+ for (String key : status.getConfigs().keySet()) {
+ if ((!exportedAllConfigs && exportedConfigs.contains(key)) ||
+ exportedAllConfigs) {
+ Map<String, String> configs = status.getConfigs().get(key);
+ publishApplicationInstanceData(key, key, configs.entrySet());
+ }
+ }
}
- Application application = getMetainfo().getApplication();
List<ExportGroup> exportGroups = application.getExportGroups();
- if (exportGroups != null && !exportGroups.isEmpty()) {
+ boolean hasExportGroups = exportGroups != null && !exportGroups.isEmpty();
+ Set<String> appExports = new HashSet();
+ String appExportsStr = getApplicationComponent(roleName).getAppExports();
+ boolean hasNoAppExports = appExportsStr == null || appExportsStr.isEmpty();
+ if (!hasNoAppExports) {
+ for (String appExport : appExportsStr.split(",")) {
+ if (appExport.trim().length() > 0) {
+ appExports.add(appExport.trim());
+ }
+ }
+ }
+
+ if (hasExportGroups && appExports.size() > 0) {
String configKeyFormat = "${site.%s.%s}";
String hostKeyFormat = "${%s_HOST}";
// publish export groups if any
- Map<String, String> replaceTokens = new HashMap<>();
+ Map<String, String> replaceTokens = new HashMap<String, String>();
for (Map.Entry<String, Map<String, ClusterNode>> entry : getRoleClusterNodeMapping().entrySet()) {
String hostName = getHostsList(entry.getValue().values(), true).iterator().next();
replaceTokens.put(String.format(hostKeyFormat, entry.getKey().toUpperCase(Locale.ENGLISH)), hostName);
@@ -602,32 +846,146 @@
}
}
+ Set<String> modifiedGroups = new HashSet<String>();
for (ExportGroup exportGroup : exportGroups) {
List<Export> exports = exportGroup.getExports();
if (exports != null && !exports.isEmpty()) {
String exportGroupName = exportGroup.getName();
- Map<String, String> map = new HashMap<>();
+ Map<String, String> map = getCurrentExports(exportGroupName);
for (Export export : exports) {
- String value = export.getValue();
- // replace host names
- for (String token : replaceTokens.keySet()) {
- if (value.contains(token)) {
- value = value.replace(token, replaceTokens.get(token));
+ if (canBeExported(exportGroupName, export.getName(), appExports)) {
+ String value = export.getValue();
+ // replace host names
+ for (String token : replaceTokens.keySet()) {
+ if (value.contains(token)) {
+ value = value.replace(token, replaceTokens.get(token));
+ }
}
+ map.put(export.getName(), value);
+ log.info("Preparing to publish. Key {} and Value {}", export.getName(), value);
}
- map.put(export.getName(), value);
- log.info("Preparing to publish. Key {} and Value {}", export.getName(), value);
}
- publishComponentConfiguration(exportGroupName, exportGroupName, map.entrySet());
+ modifiedGroups.add(exportGroupName);
}
}
+ publishModifiedExportGroups(modifiedGroups);
}
+
+ log.info("Received and processed config for {}", heartBeat.getHostname());
componentStatus.setConfigReported(true);
+
}
}
}
}
+ private boolean canBeExported(String exportGroupName, String name, Set<String> appExports) {
+ return appExports.contains(String.format("%s-%s", exportGroupName, name));
+ }
+
+ protected Map<String, String> getCurrentExports(String groupName) {
+ if(!this.exportGroups.containsKey(groupName)) {
+ synchronized (this.exportGroups) {
+ if(!this.exportGroups.containsKey(groupName)) {
+ this.exportGroups.put(groupName, new ConcurrentHashMap<String, String>());
+ }
+ }
+ }
+
+ return this.exportGroups.get(groupName);
+ }
+
+ private void publishModifiedExportGroups(Set<String> modifiedGroups) {
+ synchronized (this.exportGroups) {
+ for(String groupName : modifiedGroups) {
+ publishApplicationInstanceData(groupName, groupName, this.exportGroups.get(groupName).entrySet());
+ }
+ }
+ }
+
+ /** Publish component instance specific data if the component demands it */
+ protected void processAndPublishComponentSpecificData(Map<String, String> ports,
+ String containerId,
+ String hostFqdn,
+ String roleName) {
+ String portVarFormat = "${site.%s}";
+ String hostNamePattern = "${THIS_HOST}";
+ Map<String, String> toPublish = new HashMap<String, String>();
+
+ Application application = getMetainfo().getApplication();
+ for (Component component : application.getComponents()) {
+ if (component.getName().equals(roleName)) {
+ if (component.getComponentExports().size() > 0) {
+
+ for (ComponentExport export : component.getComponentExports()) {
+ String templateToExport = export.getValue();
+ for (String portName : ports.keySet()) {
+ boolean publishData = false;
+ String portValPattern = String.format(portVarFormat, portName);
+ if (templateToExport.contains(portValPattern)) {
+ templateToExport = templateToExport.replace(portValPattern, ports.get(portName));
+ publishData = true;
+ }
+ if (templateToExport.contains(hostNamePattern)) {
+ templateToExport = templateToExport.replace(hostNamePattern, hostFqdn);
+ publishData = true;
+ }
+ if (publishData) {
+ toPublish.put(export.getName(), templateToExport);
+ log.info("Publishing {} for name {} and container {}",
+ templateToExport, export.getName(), containerId);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (toPublish.size() > 0) {
+ Map<String, String> perContainerData = null;
+ if (!getComponentInstanceData().containsKey(containerId)) {
+ perContainerData = new ConcurrentHashMap<String, String>();
+ } else {
+ perContainerData = getComponentInstanceData().get(containerId);
+ }
+ perContainerData.putAll(toPublish);
+ getComponentInstanceData().put(containerId, perContainerData);
+ publishComponentInstanceData();
+ }
+ }
+
+ private void publishComponentInstanceData() {
+ Map<String, String> dataToPublish = new HashMap<String, String>();
+ synchronized (this.componentInstanceData) {
+ for (String container : getComponentInstanceData().keySet()) {
+ for (String prop : getComponentInstanceData().get(container).keySet()) {
+ dataToPublish.put(
+ container + "." + prop, getComponentInstanceData().get(container).get(prop));
+ }
+ }
+ }
+ publishApplicationInstanceData(COMPONENT_DATA_TAG, COMPONENT_DATA_TAG, dataToPublish.entrySet());
+ }
+
+ /**
+ * Return Component based on name
+ * @param roleName
+ * @return
+ */
+ protected Component getApplicationComponent(String roleName) {
+ Application application = getMetainfo().getApplication();
+ if (application == null) {
+ log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
+ } else {
+ for (Component component : application.getComponents()) {
+ if (component.getName().equals(roleName)) {
+ return component;
+ }
+ }
+ }
+ return null;
+ }
+
/**
* Extract script path from the application metainfo
*
@@ -636,19 +994,11 @@
* @return
*/
protected String getScriptPathFromMetainfo(String roleName) {
- String scriptPath = null;
- Application application = getMetainfo().getApplication();
- if (application == null) {
- log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
- return scriptPath;
+ Component component = getApplicationComponent(roleName);
+ if (component != null) {
+ return component.getCommandScript().getScript();
}
- for (Component component : application.getComponents()) {
- if (component.getName().equals(roleName)) {
- scriptPath = component.getCommandScript().getScript();
- break;
- }
- }
- return scriptPath;
+ return null;
}
/**
@@ -659,18 +1009,10 @@
* @return
*/
protected boolean isMaster(String roleName) {
- Application application = getMetainfo().getApplication();
- if (application == null) {
- log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
- } else {
- for (Component component : application.getComponents()) {
- if (component.getName().equals(roleName)) {
- if (component.getCategory().equals("MASTER")) {
- return true;
- } else {
- return false;
- }
- }
+ Component component = getApplicationComponent(roleName);
+ if (component != null) {
+ if (component.getCategory().equals("MASTER")) {
+ return true;
}
}
return false;
@@ -684,15 +1026,24 @@
* @return
*/
protected boolean canPublishConfig(String roleName) {
- Application application = getMetainfo().getApplication();
- if (application == null) {
- log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
- } else {
- for (Component component : application.getComponents()) {
- if (component.getName().equals(roleName)) {
- return Boolean.TRUE.toString().equals(component.getPublishConfig());
- }
- }
+ Component component = getApplicationComponent(roleName);
+ if (component != null) {
+ return Boolean.TRUE.toString().equals(component.getPublishConfig());
+ }
+ return false;
+ }
+
+ /**
+ * Checks if the role is marked auto-restart
+ *
+ * @param roleName
+ *
+ * @return
+ */
+ protected boolean isMarkedAutoRestart(String roleName) {
+ Component component = getApplicationComponent(roleName);
+ if (component != null) {
+ return component.getRequiresAutoRestart();
}
return false;
}
@@ -743,26 +1094,22 @@
throws SliderException {
assert getAmState().isApplicationLive();
ConfTreeOperations appConf = getAmState().getAppConfSnapshot();
- ConfTreeOperations resourcesConf = getAmState().getResourcesSnapshot();
- ConfTreeOperations internalsConf = getAmState().getInternalsSnapshot();
ExecutionCommand cmd = new ExecutionCommand(AgentCommandType.EXECUTION_COMMAND);
prepareExecutionCommand(cmd);
- String clusterName = internalsConf.get(OptionKeys.APPLICATION_NAME);
+ String clusterName = getClusterName();
cmd.setClusterName(clusterName);
cmd.setRoleCommand(Command.INSTALL.toString());
cmd.setServiceName(clusterName);
cmd.setComponentName(roleName);
cmd.setRole(roleName);
- Map<String, String> hostLevelParams = new TreeMap<>();
+ Map<String, String> hostLevelParams = new TreeMap<String, String>();
hostLevelParams.put(JAVA_HOME, appConf.getGlobalOptions().getMandatoryOption(JAVA_HOME));
- hostLevelParams.put(PACKAGE_LIST, "[{\"type\":\"tarball\",\"name\":\"" +
- appConf.getGlobalOptions().getMandatoryOption(
- PACKAGE_LIST) + "\"}]");
+ hostLevelParams.put(PACKAGE_LIST, getPackageList());
hostLevelParams.put(CONTAINER_ID, containerId);
cmd.setHostLevelParams(hostLevelParams);
- setInstallCommandConfigurations(cmd);
+ setInstallCommandConfigurations(cmd, containerId);
cmd.setCommandParams(setCommandParameters(scriptPath, false));
@@ -770,13 +1117,38 @@
response.addExecutionCommand(cmd);
}
+ private String getPackageList() {
+ String pkgFormatString = "{\"type\":\"%s\",\"name\":\"%s\"}";
+ String pkgListFormatString = "[%s]";
+ List<String> packages = new ArrayList();
+ Application application = getMetainfo().getApplication();
+ if (application != null) {
+ List<OSSpecific> osSpecifics = application.getOSSpecifics();
+ if (osSpecifics != null && osSpecifics.size() > 0) {
+ for (OSSpecific osSpecific : osSpecifics) {
+ if (osSpecific.getOsType().equals("any")) {
+ for (OSPackage osPackage : osSpecific.getPackages()) {
+ packages.add(String.format(pkgFormatString, osPackage.getType(), osPackage.getName()));
+ }
+ }
+ }
+ }
+ }
+
+ if (packages.size() > 0) {
+ return String.format(pkgListFormatString, StringUtils.join(",", packages));
+ } else {
+ return "[]";
+ }
+ }
+
private void prepareExecutionCommand(ExecutionCommand cmd) {
cmd.setTaskId(taskId.incrementAndGet());
cmd.setCommandId(cmd.getTaskId() + "-1");
}
private Map<String, String> setCommandParameters(String scriptPath, boolean recordConfig) {
- Map<String, String> cmdParams = new TreeMap<>();
+ Map<String, String> cmdParams = new TreeMap<String, String>();
cmdParams.put("service_package_folder",
"${AGENT_WORK_ROOT}/work/app/definition/package");
cmdParams.put("script", scriptPath);
@@ -787,9 +1159,9 @@
return cmdParams;
}
- private void setInstallCommandConfigurations(ExecutionCommand cmd) throws SliderException {
+ private void setInstallCommandConfigurations(ExecutionCommand cmd, String containerId) throws SliderException {
ConfTreeOperations appConf = getAmState().getAppConfSnapshot();
- Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf);
+ Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf, containerId);
cmd.setConfigurations(configurations);
}
@@ -798,10 +1170,9 @@
throws SliderException {
assert getAmState().isApplicationLive();
ConfTreeOperations appConf = getAmState().getAppConfSnapshot();
- ConfTreeOperations internalsConf = getAmState().getInternalsSnapshot();
StatusCommand cmd = new StatusCommand();
- String clusterName = internalsConf.get(OptionKeys.APPLICATION_NAME);
+ String clusterName = getClusterName();
cmd.setCommandType(AgentCommandType.STATUS_COMMAND);
cmd.setComponentName(roleName);
@@ -809,14 +1180,14 @@
cmd.setClusterName(clusterName);
cmd.setRoleCommand(StatusCommand.STATUS_COMMAND);
- Map<String, String> hostLevelParams = new TreeMap<>();
+ Map<String, String> hostLevelParams = new TreeMap<String, String>();
hostLevelParams.put(JAVA_HOME, appConf.getGlobalOptions().getMandatoryOption(JAVA_HOME));
hostLevelParams.put(CONTAINER_ID, containerId);
cmd.setHostLevelParams(hostLevelParams);
cmd.setCommandParams(setCommandParameters(scriptPath, false));
- Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf);
+ Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf, containerId);
cmd.setConfigurations(configurations);
@@ -827,17 +1198,16 @@
protected void addGetConfigCommand(String roleName, String containerId, HeartBeatResponse response)
throws SliderException {
assert getAmState().isApplicationLive();
- ConfTreeOperations internalsConf = getAmState().getInternalsSnapshot();
StatusCommand cmd = new StatusCommand();
- String clusterName = internalsConf.get(OptionKeys.APPLICATION_NAME);
+ String clusterName = getClusterName();
cmd.setCommandType(AgentCommandType.STATUS_COMMAND);
cmd.setComponentName(roleName);
cmd.setServiceName(clusterName);
cmd.setClusterName(clusterName);
cmd.setRoleCommand(StatusCommand.GET_CONFIG_COMMAND);
- Map<String, String> hostLevelParams = new TreeMap<>();
+ Map<String, String> hostLevelParams = new TreeMap<String, String>();
hostLevelParams.put(CONTAINER_ID, containerId);
cmd.setHostLevelParams(hostLevelParams);
@@ -847,7 +1217,8 @@
}
@VisibleForTesting
- protected void addStartCommand(String roleName, String containerId, HeartBeatResponse response, String scriptPath)
+ protected void addStartCommand(String roleName, String containerId, HeartBeatResponse response,
+ String scriptPath, boolean isMarkedAutoRestart)
throws
SliderException {
assert getAmState().isApplicationLive();
@@ -864,27 +1235,49 @@
cmd.setServiceName(clusterName);
cmd.setComponentName(roleName);
cmd.setRole(roleName);
- Map<String, String> hostLevelParams = new TreeMap<>();
+ Map<String, String> hostLevelParams = new TreeMap<String, String>();
hostLevelParams.put(JAVA_HOME, appConf.getGlobalOptions().getMandatoryOption(JAVA_HOME));
hostLevelParams.put(CONTAINER_ID, containerId);
cmd.setHostLevelParams(hostLevelParams);
+ Map<String, String> roleParams = new TreeMap<String, String>();
+ cmd.setRoleParams(roleParams);
+ cmd.getRoleParams().put("auto_restart", Boolean.toString(isMarkedAutoRestart));
+
cmd.setCommandParams(setCommandParameters(scriptPath, true));
- Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf);
+ Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf, containerId);
cmd.setConfigurations(configurations);
response.addExecutionCommand(cmd);
}
protected Map<String, String> getAllocatedPorts() {
- return this.allocatedPorts;
+ return getAllocatedPorts(SHARED_PORT_TAG);
}
- private Map<String, Map<String, String>> buildCommandConfigurations(ConfTreeOperations appConf)
+ protected Map<String, Map<String, String>> getComponentInstanceData() {
+ return this.componentInstanceData;
+ }
+
+ protected Map<String, String> getAllocatedPorts(String containerId) {
+ if (!this.allocatedPorts.containsKey(containerId)) {
+ synchronized (this.allocatedPorts) {
+ if (!this.allocatedPorts.containsKey(containerId)) {
+ this.allocatedPorts.put(containerId,
+ new ConcurrentHashMap<String, String>());
+ }
+ }
+ }
+ return this.allocatedPorts.get(containerId);
+ }
+
+ private Map<String, Map<String, String>> buildCommandConfigurations(
+ ConfTreeOperations appConf, String containerId)
throws SliderException {
- Map<String, Map<String, String>> configurations = new TreeMap<>();
+ Map<String, Map<String, String>> configurations =
+ new TreeMap<String, Map<String, String>>();
Map<String, String> tokens = getStandardTokenMap(appConf);
List<String> configs = getApplicationConfigurationTypes(appConf);
@@ -892,14 +1285,14 @@
//Add global
for (String configType : configs) {
addNamedConfiguration(configType, appConf.getGlobalOptions().options,
- configurations, tokens);
+ configurations, tokens, containerId);
}
return configurations;
}
private Map<String, String> getStandardTokenMap(ConfTreeOperations appConf) throws SliderException {
- Map<String, String> tokens = new HashMap<>();
+ Map<String, String> tokens = new HashMap<String, String>();
String nnuri = appConf.get("site.fs.defaultFS");
tokens.put("${NN_URI}", nnuri);
tokens.put("${NN_HOST}", URI.create(nnuri).getHost());
@@ -908,31 +1301,32 @@
tokens.put("${DEFAULT_DATA_DIR}", getAmState()
.getInternalsSnapshot()
.getGlobalOptions()
- .getMandatoryOption(OptionKeys.INTERNAL_DATA_DIR_PATH));
+ .getMandatoryOption(InternalKeys.INTERNAL_DATA_DIR_PATH));
return tokens;
}
private List<String> getApplicationConfigurationTypes(ConfTreeOperations appConf) {
// for now, reading this from appConf. In the future, modify this method to
// process metainfo.xml
- List<String> configList = new ArrayList<>();
+ List<String> configList = new ArrayList<String>();
configList.add(GLOBAL_CONFIG_TAG);
String configTypes = appConf.get("config_types");
- String[] configs = configTypes.split(",");
-
- configList.addAll(Arrays.asList(configs));
+ if (configTypes != null && configTypes.length() > 0) {
+ String[] configs = configTypes.split(",");
+ configList.addAll(Arrays.asList(configs));
+ }
// remove duplicates. mostly worried about 'global' being listed
- return new ArrayList<>(new HashSet<>(configList));
+ return new ArrayList<String>(new HashSet<String>(configList));
}
private void addNamedConfiguration(String configName, Map<String, String> sourceConfig,
Map<String, Map<String, String>> configurations,
- Map<String, String> tokens) {
- Map<String, String> config = new HashMap<>();
+ Map<String, String> tokens, String containerId) {
+ Map<String, String> config = new HashMap<String, String>();
if (configName.equals(GLOBAL_CONFIG_TAG)) {
- addDefaultGlobalConfig(config);
+ addDefaultGlobalConfig(config, containerId);
}
// add role hosts to tokens
addRoleRelatedTokens(tokens);
@@ -941,8 +1335,18 @@
//apply any port updates
if (!this.getAllocatedPorts().isEmpty()) {
for (String key : config.keySet()) {
- if (this.getAllocatedPorts().containsKey(key)) {
- config.put(key, getAllocatedPorts().get(key));
+ String value = config.get(key);
+ String lookupKey = configName + "." + key;
+ if(!value.contains(DO_NOT_PROPAGATE_TAG)) {
+ // If the config property is shared then pass on the already allocated value
+ // from any container
+ if (this.getAllocatedPorts().containsKey(lookupKey)) {
+ config.put(key, getAllocatedPorts().get(lookupKey));
+ }
+ } else {
+ if (this.getAllocatedPorts(containerId).containsKey(lookupKey)) {
+ config.put(key, getAllocatedPorts(containerId).get(lookupKey));
+ }
}
}
}
@@ -959,7 +1363,7 @@
private Iterable<String> getHostsList(Collection<ClusterNode> values,
boolean hostOnly) {
- List<String> hosts = new ArrayList<>();
+ List<String> hosts = new ArrayList<String>();
for (ClusterNode cn : values) {
hosts.add(hostOnly ? cn.host : cn.host + "/" + cn.name);
}
@@ -967,17 +1371,11 @@
return hosts;
}
- private void addDefaultGlobalConfig(Map<String, String> config) {
- config.put("app_log_dir", "${AGENT_LOG_ROOT}/app/log");
+ private void addDefaultGlobalConfig(Map<String, String> config, String containerId) {
+ config.put("app_log_dir", "${AGENT_LOG_ROOT}");
config.put("app_pid_dir", "${AGENT_WORK_ROOT}/app/run");
config.put("app_install_dir", "${AGENT_WORK_ROOT}/app/install");
- }
-
- @Override
- public Map<String, String> buildMonitorDetails(ClusterDescription clusterDesc) {
- Map<String, String> details = super.buildMonitorDetails(clusterDesc);
- buildRoleHostDetails(details);
- return details;
+ config.put("app_container_id", containerId);
}
private void buildRoleHostDetails(Map<String, String> details) {
@@ -988,25 +1386,4 @@
"");
}
}
-
- @Override
- public void applyInitialRegistryDefinitions(URL unsecureWebAPI,
- URL secureWebAPI,
- ServiceInstanceData instanceData) throws IOException {
- super.applyInitialRegistryDefinitions(unsecureWebAPI,
- secureWebAPI,
- instanceData
- );
-
- try {
- instanceData.internalView.endpoints.put(
- CustomRegistryConstants.AGENT_REST_API,
- new RegisteredEndpoint(
- new URL(secureWebAPI, SLIDER_PATH_AGENTS),
- "Agent REST API"));
- } catch (URISyntaxException e) {
- throw new IOException(e);
- }
-
- }
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java
index 0dce4bb..f4ace5f 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java
@@ -37,7 +37,8 @@
public static final Logger log =
LoggerFactory.getLogger(ComponentCommandOrder.class);
private static char SPLIT_CHAR = '-';
- Map<Command, Map<String, List<ComponentState>>> dependencies = new HashMap<>();
+ Map<Command, Map<String, List<ComponentState>>> dependencies =
+ new HashMap<Command, Map<String, List<ComponentState>>>();
public ComponentCommandOrder(List<CommandOrder> commandOrders) {
if (commandOrders != null && commandOrders.size() > 0) {
@@ -48,13 +49,13 @@
if (requiredStates.size() > 0) {
Map<String, List<ComponentState>> compDep = dependencies.get(componentCmd.command);
if (compDep == null) {
- compDep = new HashMap<>();
+ compDep = new HashMap<String, List<ComponentState>>();
dependencies.put(componentCmd.command, compDep);
}
List<ComponentState> requirements = compDep.get(componentCmd.componentName);
if (requirements == null) {
- requirements = new ArrayList<>();
+ requirements = new ArrayList<ComponentState>();
compDep.put(componentCmd.componentName, requirements);
}
@@ -70,7 +71,7 @@
}
String[] componentStates = requires.split(",");
- List<ComponentState> retList = new ArrayList<>();
+ List<ComponentState> retList = new ArrayList<ComponentState>();
for (String componentStateStr : componentStates) {
retList.add(getComponentState(componentStateStr));
}
@@ -126,18 +127,18 @@
for (ComponentState stateToMatch : required) {
for (ComponentInstanceState currState : currentStates) {
log.debug("Checking schedule {} {} against dependency {} is {}",
- component, command, currState.getCompName(), currState.getState());
- if (currState.getCompName().equals(stateToMatch.componentName)) {
+ component, command, currState.getComponentName(), currState.getState());
+ if (currState.getComponentName().equals(stateToMatch.componentName)) {
if (currState.getState() != stateToMatch.state) {
if (stateToMatch.state == State.STARTED) {
log.info("Cannot schedule {} {} as dependency {} is {}",
- component, command, currState.getCompName(), currState.getState());
+ component, command, currState.getComponentName(), currState.getState());
canExecute = false;
} else {
//state is INSTALLED
if (currState.getState() != State.STARTING && currState.getState() != State.STARTED) {
log.info("Cannot schedule {} {} as dependency {} is {}",
- component, command, currState.getCompName(), currState.getState());
+ component, command, currState.getComponentName(), currState.getState());
canExecute = false;
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java
index 60a6f82..f7f8bf4 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java
@@ -19,6 +19,8 @@
package org.apache.slider.providers.agent;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -30,8 +32,9 @@
private static String INVALID_TRANSITION_ERROR =
"Result {0} for command {1} is not expected for component {2} in state {3}.";
- private final String compName;
- private final String containerId;
+ private final String componentName;
+ private final ContainerId containerId;
+ private final String containerIdAsString;
private final String applicationId;
private State state = State.INIT;
private State targetState = State.STARTED;
@@ -40,18 +43,19 @@
private long lastHeartbeat = 0;
private ContainerState containerState;
- public ComponentInstanceState(String compName,
- String containerId,
- String applicationId) {
- this.compName = compName;
+ public ComponentInstanceState(String componentName,
+ ContainerId containerId,
+ String applicationId) {
+ this.componentName = componentName;
this.containerId = containerId;
+ this.containerIdAsString = containerId.toString();
this.applicationId = applicationId;
this.containerState = ContainerState.INIT;
this.lastHeartbeat = System.currentTimeMillis();
}
- public String getCompName() {
- return compName;
+ public String getComponentName() {
+ return componentName;
}
public Boolean getConfigReported() {
@@ -74,20 +78,32 @@
return lastHeartbeat;
}
- public void setLastHeartbeat(long lastHeartbeat) {
- this.lastHeartbeat = lastHeartbeat;
- if(this.containerState == ContainerState.UNHEALTHY ||
- this.containerState == ContainerState.INIT) {
- this.containerState = ContainerState.HEALTHY;
+ /**
+ * Update the heartbeat, and change container state
+ * to mark as healthy if appropriate
+ * @param heartbeatTime last time the heartbeat was seen
+ * @return the current container state
+ */
+ public ContainerState heartbeat(long heartbeatTime) {
+ this.lastHeartbeat = heartbeatTime;
+ if(containerState == ContainerState.UNHEALTHY ||
+ containerState == ContainerState.INIT) {
+ containerState = ContainerState.HEALTHY;
}
+ return containerState;
+ }
+
+
+ public ContainerId getContainerId() {
+ return containerId;
}
public void commandIssued(Command command) {
Command expected = getNextCommand();
if (expected != command) {
- throw new IllegalArgumentException("Command " + command + " is not allowed is state " + state);
+ throw new IllegalArgumentException("Command " + command + " is not allowed in state " + state);
}
- this.state = this.state.getNextState(command);
+ state = state.getNextState(command);
}
public void applyCommandResult(CommandResult result, Command command) {
@@ -101,12 +117,12 @@
} else if (result == CommandResult.COMPLETED) {
failuresSeen = 0;
}
- this.state = this.state.getNextState(result);
+ state = state.getNextState(result);
} catch (IllegalArgumentException e) {
String message = String.format(INVALID_TRANSITION_ERROR,
result.toString(),
command.toString(),
- compName,
+ componentName,
state.toString());
log.warn(message);
throw new IllegalStateException(message);
@@ -114,8 +130,8 @@
}
public boolean hasPendingCommand() {
- if (this.state.canIssueCommands() &&
- this.state != this.targetState &&
+ if (state.canIssueCommands() &&
+ state != targetState &&
failuresSeen < MAX_FAILURE_TOLERATED) {
return true;
}
@@ -144,8 +160,8 @@
public int hashCode() {
int hashCode = 1;
- hashCode = hashCode ^ (compName != null ? compName.hashCode() : 0);
- hashCode = hashCode ^ (containerId != null ? containerId.hashCode() : 0);
+ hashCode = hashCode ^ (componentName != null ? componentName.hashCode() : 0);
+ hashCode = hashCode ^ (containerIdAsString != null ? containerIdAsString.hashCode() : 0);
hashCode = hashCode ^ (applicationId != null ? applicationId.hashCode() : 0);
return hashCode;
}
@@ -158,13 +174,13 @@
ComponentInstanceState that = (ComponentInstanceState) o;
- if (this.compName != null ?
- !this.compName.equals(that.compName) : this.compName != null) {
+ if (this.componentName != null ?
+ !this.componentName.equals(that.componentName) : this.componentName != null) {
return false;
}
- if (this.containerId != null ?
- !this.containerId.equals(that.containerId) : this.containerId != null) {
+ if (this.containerIdAsString != null ?
+ !this.containerIdAsString.equals(that.containerIdAsString) : this.containerIdAsString != null) {
return false;
}
@@ -175,4 +191,18 @@
return true;
}
+
+ @Override
+ public String toString() {
+ final StringBuilder sb =
+ new StringBuilder("ComponentInstanceState{");
+ sb.append("containerIdAsString='").append(containerIdAsString).append('\'');
+ sb.append(", state=").append(state);
+ sb.append(", failuresSeen=").append(failuresSeen);
+ sb.append(", lastHeartbeat=").append(lastHeartbeat);
+ sb.append(", containerState=").append(containerState);
+ sb.append(", componentName='").append(componentName).append('\'');
+ sb.append('}');
+ return sb.toString();
+ }
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java b/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
index 3aeff66..0a1beca 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
@@ -17,6 +17,8 @@
*/
package org.apache.slider.providers.agent;
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -53,10 +55,7 @@
}
public boolean isAlive() {
- if (monitorThread != null) {
- return monitorThread.isAlive();
- }
- return false;
+ return monitorThread != null && monitorThread.isAlive();
}
@Override
@@ -66,7 +65,7 @@
log.debug("Putting monitor to sleep for " + threadWakeupInterval + " " +
"milliseconds");
Thread.sleep(threadWakeupInterval);
- doWork();
+ doWork(System.currentTimeMillis());
} catch (InterruptedException ex) {
log.warn("Scheduler thread is interrupted going to stop", ex);
shouldRun = false;
@@ -83,32 +82,46 @@
* received in last check interval they are marked as UNHEALTHY. INIT is when the agent is started but it did not
* communicate at all. HEALTHY being the AM has received heartbeats. After an interval as UNHEALTHY the container is
* declared unavailable
+ * @param now current time in milliseconds ... tests can set this explicitly
*/
- private void doWork() {
+ @VisibleForTesting
+ public void doWork(long now) {
Map<String, ComponentInstanceState> componentStatuses = provider.getComponentStatuses();
if (componentStatuses != null) {
for (String containerLabel : componentStatuses.keySet()) {
ComponentInstanceState componentInstanceState = componentStatuses.get(containerLabel);
- long timeSinceLastHeartbeat = System.currentTimeMillis() - componentInstanceState.getLastHeartbeat();
+ long timeSinceLastHeartbeat = now - componentInstanceState.getLastHeartbeat();
if (timeSinceLastHeartbeat > threadWakeupInterval) {
- if (componentInstanceState.getContainerState() == ContainerState.HEALTHY ||
- componentInstanceState.getContainerState() == ContainerState.INIT) {
- componentInstanceState.setContainerState(ContainerState.UNHEALTHY);
- log.warn("Component {} marked UNHEALTHY. Last heartbeat received at {} approx. {} ms. back.",
- containerLabel, componentInstanceState.getLastHeartbeat(),
- timeSinceLastHeartbeat);
- continue;
+ switch (componentInstanceState.getContainerState()) {
+ case INIT:
+ case HEALTHY:
+ componentInstanceState.setContainerState(ContainerState.UNHEALTHY);
+ log.warn(
+ "Component {} marked UNHEALTHY. Last heartbeat received at {} approx. {} ms. back.",
+ componentInstanceState,
+ componentInstanceState.getLastHeartbeat(),
+ timeSinceLastHeartbeat);
+ break;
+ case UNHEALTHY:
+ if (timeSinceLastHeartbeat > threadWakeupInterval * 2) {
+ componentInstanceState.setContainerState(
+ ContainerState.HEARTBEAT_LOST);
+ log.warn(
+ "Component {} marked HEARTBEAT_LOST. Last heartbeat received at {} approx. {} ms. back.",
+ componentInstanceState, componentInstanceState.getLastHeartbeat(),
+ timeSinceLastHeartbeat);
+ ContainerId containerId =
+ componentInstanceState.getContainerId();
+ provider.lostContainer(containerLabel, containerId);
+ }
+ break;
+ case HEARTBEAT_LOST:
+ // unexpected case
+ log.warn("Heartbeat from lost component: {}", componentInstanceState);
+ break;
}
- if (componentInstanceState.getContainerState() == ContainerState.UNHEALTHY
- && timeSinceLastHeartbeat > threadWakeupInterval * 2) {
- componentInstanceState.setContainerState(ContainerState.HEARTBEAT_LOST);
- log.warn("Component {} marked HEARTBEAT_LOST. Last heartbeat received at {} approx. {} ms. back.",
- containerLabel, componentInstanceState.getLastHeartbeat(),
- timeSinceLastHeartbeat);
- this.provider.releaseContainer(containerLabel);
- continue;
- }
+
}
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java
index b007313..d994e33 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java
@@ -26,6 +26,7 @@
String name;
String comment;
String version;
+ String exportedConfigs;
List<Component> components;
List<ExportGroup> exportGroups;
List<OSSpecific> osSpecifics;
@@ -33,10 +34,10 @@
ConfigurationDependencies configDependencies;
public Application() {
- exportGroups = new ArrayList<>();
- components = new ArrayList<>();
- osSpecifics = new ArrayList<>();
- commandOrders = new ArrayList<>();
+ exportGroups = new ArrayList<ExportGroup>();
+ components = new ArrayList<Component>();
+ osSpecifics = new ArrayList<OSSpecific>();
+ commandOrders = new ArrayList<CommandOrder>();
}
public String getName() {
@@ -63,6 +64,14 @@
this.version = version;
}
+ public String getExportedConfigs() {
+ return exportedConfigs;
+ }
+
+ public void setExportedConfigs(String exportedConfigs) {
+ this.exportedConfigs = exportedConfigs;
+ }
+
public ConfigurationDependencies getConfigDependencies() {
return configDependencies;
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java
index 6cd08e0..03c64d4 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java
@@ -16,6 +16,9 @@
*/
package org.apache.slider.providers.agent.application.metadata;
+import java.util.ArrayList;
+import java.util.List;
+
/**
*
*/
@@ -25,10 +28,14 @@
String publishConfig;
String minInstanceCount;
String maxInstanceCount;
+ String autoStartOnFailure;
+ String appExports;
CommandScript commandScript;
+ List<ComponentExport> componentExports;
public Component() {
publishConfig = Boolean.FALSE.toString();
+ componentExports = new ArrayList<ComponentExport>();
}
public String getName() {
@@ -55,6 +62,22 @@
this.publishConfig = publishConfig;
}
+ public String getAutoStartOnFailure() {
+ return autoStartOnFailure;
+ }
+
+ public void setAutoStartOnFailure(String autoStartOnFailure) {
+ this.autoStartOnFailure = autoStartOnFailure;
+ }
+
+ public String getAppExports() {
+ return appExports;
+ }
+
+ public void setAppExports(String appExports) {
+ this.appExports = appExports;
+ }
+
public String getMinInstanceCount() {
return minInstanceCount;
}
@@ -79,6 +102,18 @@
this.commandScript = commandScript;
}
+ public void addComponentExport(ComponentExport export) {
+ componentExports.add(export);
+ }
+
+ public List<ComponentExport> getComponentExports() {
+ return componentExports;
+ }
+
+ public Boolean getRequiresAutoRestart() {
+ return Boolean.parseBoolean(this.autoStartOnFailure);
+ }
+
@Override
public String toString() {
final StringBuilder sb =
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/ComponentExport.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/ComponentExport.java
new file mode 100644
index 0000000..a18854c
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/ComponentExport.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.slider.providers.agent.application.metadata;
+
+/**
+ *
+ */
+public class ComponentExport {
+ String name;
+ String value;
+
+ public ComponentExport() {
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb =
+ new StringBuilder("{");
+ sb.append(",\n\"name\": ").append(name);
+ sb.append(",\n\"value\": ").append(value);
+ sb.append('}');
+ return sb.toString();
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java
index c7922a7..bc93d6f 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java
@@ -38,6 +38,7 @@
digester.addBeanPropertySetter("*/application/name");
digester.addBeanPropertySetter("*/application/comment");
digester.addBeanPropertySetter("*/application/version");
+ digester.addBeanPropertySetter("*/application/exportedConfigs");
digester.addObjectCreate("*/commandOrder", CommandOrder.class);
digester.addBeanPropertySetter("*/commandOrder/command");
@@ -58,6 +59,12 @@
digester.addBeanPropertySetter("*/component/publishConfig");
digester.addBeanPropertySetter("*/component/minInstanceCount");
digester.addBeanPropertySetter("*/component/maxInstanceCount");
+ digester.addBeanPropertySetter("*/component/autoStartOnFailure");
+ digester.addBeanPropertySetter("*/component/appExports");
+ digester.addObjectCreate("*/componentExport", ComponentExport.class);
+ digester.addBeanPropertySetter("*/componentExport/name");
+ digester.addBeanPropertySetter("*/componentExport/value");
+ digester.addSetNext("*/componentExport", "addComponentExport");
digester.addSetNext("*/component", "addComponent");
digester.addObjectCreate("*/commandScript", CommandScript.class);
diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
index 6aeb801..dc84f02 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
@@ -19,6 +19,7 @@
package org.apache.slider.providers.slideram;
import com.beust.jcommander.JCommander;
+import com.codahale.metrics.MetricRegistry;
import com.google.gson.GsonBuilder;
import org.apache.curator.CuratorZookeeperClient;
import org.apache.curator.framework.CuratorFramework;
@@ -28,7 +29,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.RoleKeys;
import org.apache.slider.common.SliderKeys;
@@ -59,8 +60,8 @@
* This keeps aspects of role, cluster validation and Clusterspec setup
* out of the core slider client
*/
-public class SliderAMClientProvider extends AbstractClientProvider implements
- SliderKeys {
+public class SliderAMClientProvider extends AbstractClientProvider
+ implements SliderKeys {
protected static final Logger log =
@@ -83,7 +84,7 @@
* List of roles
*/
public static final List<ProviderRole> ROLES =
- new ArrayList<>();
+ new ArrayList<ProviderRole>();
public static final int KEY_AM = ROLE_AM_PRIORITY_INDEX;
@@ -113,9 +114,8 @@
AggregateConf instanceDefinition,
Path clusterDirPath,
Path generatedConfDirPath,
- boolean secure) throws
- SliderException,
- IOException {
+ boolean secure)
+ throws SliderException, IOException {
super.preflightValidateClusterConfiguration(sliderFileSystem, clustername, configuration, instanceDefinition, clusterDirPath, generatedConfDirPath, secure);
//add a check for the directory being writeable by the current user
@@ -123,7 +123,7 @@
dataPath = instanceDefinition.getInternalOperations()
.getGlobalOptions()
.getMandatoryOption(
- OptionKeys.INTERNAL_DATA_DIR_PATH);
+ InternalKeys.INTERNAL_DATA_DIR_PATH);
Path path = new Path(dataPath);
sliderFileSystem.verifyDirectoryWriteAccess(path);
@@ -147,7 +147,7 @@
throws IOException, SliderException {
Map<String, LocalResource> providerResources =
- new HashMap<>();
+ new HashMap<String, LocalResource>();
ProviderUtils.addProviderJar(providerResources,
@@ -167,7 +167,7 @@
CuratorZookeeperClient.class,
ServiceInstance.class,
ServiceNames.class,
-
+ MetricRegistry.class
};
String[] jars =
{
@@ -179,6 +179,7 @@
"curator-client.jar",
"curator-x-discovery.jar",
"curator-x-discovery-service.jar",
+ "metrics-core.jar"
};
ProviderUtils.addDependencyJars(providerResources, fileSystem, tempPath,
libdir, jars,
@@ -209,11 +210,11 @@
* add them to the command line
*/
public void addJVMOptions(AggregateConf aggregateConf,
- JavaCommandLineBuilder cmdLine) throws
- BadConfigException {
-
+ JavaCommandLineBuilder cmdLine)
+ throws BadConfigException {
+
MapOperations sliderAM =
- aggregateConf.getAppConfOperations().getMandatoryComponent(
+ aggregateConf.getAppConfOperations().getMandatoryComponent(
SliderKeys.COMPONENT_AM);
cmdLine.forceIPv4().headless();
String heap = sliderAM.getOption(RoleKeys.JVM_HEAP,
@@ -227,11 +228,10 @@
@Override
- public void prepareInstanceConfiguration(AggregateConf aggregateConf) throws
- SliderException,
- IOException {
+ public void prepareInstanceConfiguration(AggregateConf aggregateConf)
+ throws SliderException, IOException {
mergeTemplates(aggregateConf,
- INTERNAL_JSON, RESOURCES_JSON, APPCONF_JSON
+ INTERNAL_JSON, RESOURCES_JSON, APPCONF_JSON
);
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
index 184c25a..071fc19 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
@@ -100,7 +100,7 @@
@Override
public List<ProviderRole> getRoles() {
- return new ArrayList<>(0);
+ return new ArrayList<ProviderRole>(0);
}
@Override
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/ContainerStartOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/ContainerStartOperation.java
deleted file mode 100644
index 50c99f3..0000000
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/ContainerStartOperation.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.slider.server.appmaster;
-
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
-import org.apache.slider.server.appmaster.state.RoleInstance;
-
-/**
- * Callback for container start requests
- */
-public interface ContainerStartOperation {
- /**
- * Add a node to the list of starting
- * nodes then trigger the NM start operation with the given
- * launch context
- * @param container container
- * @param ctx context
- * @param instance node details
- */
- void startContainer(Container container,
- ContainerLaunchContext ctx,
- RoleInstance instance) ;
-}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java
index 5a5baaa..e8b6802 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java
@@ -28,32 +28,42 @@
import org.apache.slider.core.launch.ContainerLauncher;
import org.apache.slider.providers.ProviderRole;
import org.apache.slider.providers.ProviderService;
+import org.apache.slider.server.appmaster.actions.ActionStartContainer;
+import org.apache.slider.server.appmaster.actions.AsyncAction;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
import org.apache.slider.server.appmaster.state.RoleInstance;
import org.apache.slider.server.appmaster.state.RoleStatus;
-import org.apache.slider.server.services.workflow.AbstractWorkflowExecutorService;
+import org.apache.slider.server.services.workflow.WorkflowExecutorService;
import org.apache.slider.server.services.workflow.ServiceThreadFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* A service for launching containers
*/
-public class RoleLaunchService extends AbstractWorkflowExecutorService {
+public class RoleLaunchService
+ extends WorkflowExecutorService<ExecutorService> {
protected static final Logger log =
LoggerFactory.getLogger(RoleLaunchService.class);
public static final String ROLE_LAUNCH_SERVICE = "RoleLaunchService";
- /**
- * Callback to whatever has the task of actually running the container
- * start operation
- */
- private final ContainerStartOperation containerStarter;
+ /**
+ * Queue submission API
+ */
+ private final QueueAccess actionQueue;
+
+ /**
+ * Provider bulding up the command
+ */
private final ProviderService provider;
+
/**
* Filesystem to use for the launch
*/
@@ -75,20 +85,21 @@
/**
* Construct an instance of the launcher
* @param startOperation the callback to start the opreation
+ * @param actionQueue
* @param provider the provider
* @param fs filesystem
* @param generatedConfDirPath path in the FS for the generated dir
* @param envVars environment variables
* @param launcherTmpDirPath path for a temporary data in the launch process
*/
- public RoleLaunchService(ContainerStartOperation startOperation,
- ProviderService provider,
- SliderFileSystem fs,
- Path generatedConfDirPath,
- Map<String, String> envVars,
+ public RoleLaunchService(QueueAccess queueAccess,
+ ProviderService provider,
+ SliderFileSystem fs,
+ Path generatedConfDirPath,
+ Map<String, String> envVars,
Path launcherTmpDirPath) {
super(ROLE_LAUNCH_SERVICE);
- containerStarter = startOperation;
+ this.actionQueue = queueAccess;
this.fs = fs;
this.generatedConfDirPath = generatedConfDirPath;
this.launcherTmpDirPath = launcherTmpDirPath;
@@ -203,9 +214,10 @@
instance.role = containerRole;
instance.roleId = role.id;
instance.environment = envDescription;
- containerStarter.startContainer(container,
- containerLauncher.completeContainerLaunch(),
- instance);
+ actionQueue.put(new ActionStartContainer("starting " + containerRole,
+ 0, container,
+ containerLauncher.completeContainerLaunch(),
+ instance));
} catch (Exception e) {
log.error("Exception thrown while trying to start {}: {}",
containerRole, e);
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index 0b22910..93adfb2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -18,6 +18,8 @@
package org.apache.slider.server.appmaster;
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.BlockingService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -55,7 +57,7 @@
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.webapp.WebApps;
import org.apache.slider.api.ClusterDescription;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.RoleKeys;
import org.apache.slider.api.SliderClusterProtocol;
@@ -75,12 +77,13 @@
import org.apache.slider.core.build.InstanceIO;
import org.apache.slider.core.conf.AggregateConf;
import org.apache.slider.core.conf.ConfTree;
+import org.apache.slider.core.conf.ConfTreeOperations;
import org.apache.slider.core.conf.MapOperations;
import org.apache.slider.core.exceptions.BadConfigException;
import org.apache.slider.core.exceptions.SliderException;
import org.apache.slider.core.exceptions.SliderInternalStateException;
import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
-import org.apache.slider.core.main.LauncherExitCodes;
+import org.apache.slider.core.main.ExitCodeProvider;
import org.apache.slider.core.main.RunService;
import org.apache.slider.core.main.ServiceLauncher;
import org.apache.slider.core.persist.ConfTreeSerDeser;
@@ -94,17 +97,32 @@
import org.apache.slider.providers.SliderProviderFactory;
import org.apache.slider.providers.slideram.SliderAMClientProvider;
import org.apache.slider.providers.slideram.SliderAMProviderService;
+import org.apache.slider.server.appmaster.actions.ActionKillContainer;
+import org.apache.slider.server.appmaster.actions.RegisterComponentInstance;
+import org.apache.slider.server.appmaster.actions.QueueExecutor;
+import org.apache.slider.server.appmaster.actions.ActionHalt;
+import org.apache.slider.server.appmaster.actions.QueueService;
+import org.apache.slider.server.appmaster.actions.ActionStopSlider;
+import org.apache.slider.server.appmaster.actions.AsyncAction;
+import org.apache.slider.server.appmaster.actions.RenewingAction;
+import org.apache.slider.server.appmaster.actions.ResetFailureWindow;
+import org.apache.slider.server.appmaster.actions.UnregisterComponentInstance;
+import org.apache.slider.server.appmaster.monkey.ChaosKillAM;
+import org.apache.slider.server.appmaster.monkey.ChaosKillContainer;
+import org.apache.slider.server.appmaster.monkey.ChaosMonkeyService;
+import org.apache.slider.server.appmaster.operations.AsyncRMOperationHandler;
+import org.apache.slider.server.appmaster.operations.ProviderNotifyingOperationHandler;
import org.apache.slider.server.appmaster.rpc.RpcBinder;
import org.apache.slider.server.appmaster.rpc.SliderAMPolicyProvider;
import org.apache.slider.server.appmaster.rpc.SliderClusterProtocolPBImpl;
-import org.apache.slider.server.appmaster.state.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
import org.apache.slider.server.appmaster.state.AppState;
import org.apache.slider.server.appmaster.state.ContainerAssignment;
-import org.apache.slider.server.appmaster.state.ContainerReleaseOperation;
import org.apache.slider.server.appmaster.state.ProviderAppState;
-import org.apache.slider.server.appmaster.state.RMOperationHandler;
+import org.apache.slider.server.appmaster.operations.RMOperationHandler;
import org.apache.slider.server.appmaster.state.RoleInstance;
import org.apache.slider.server.appmaster.state.RoleStatus;
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
import org.apache.slider.server.appmaster.web.AgentService;
import org.apache.slider.server.appmaster.web.rest.agent.AgentWebApp;
import org.apache.slider.server.appmaster.web.SliderAMWebApp;
@@ -117,6 +135,8 @@
import org.apache.slider.server.services.security.CertificateManager;
import org.apache.slider.server.services.utility.AbstractSliderLaunchedService;
import org.apache.slider.server.services.utility.WebAppService;
+import org.apache.slider.server.services.workflow.ServiceThreadFactory;
+import org.apache.slider.server.services.workflow.WorkflowExecutorService;
import org.apache.slider.server.services.workflow.WorkflowRpcService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -132,9 +152,11 @@
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
@@ -154,9 +176,7 @@
SliderClusterProtocol,
ServiceStateChangeListener,
RoleKeys,
- ProviderCompleted,
- ContainerStartOperation,
- AMViewForProviders {
+ ProviderCompleted {
protected static final Logger log =
LoggerFactory.getLogger(SliderAppMaster.class);
@@ -179,7 +199,13 @@
public static final int HEARTBEAT_INTERVAL = 1000;
public static final int NUM_RPC_HANDLERS = 5;
+ public static final int SCHEDULED_EXECUTOR_POOL_SIZE = 1;
+ /**
+ * Singleton of metrics registry
+ */
+ public static final MetricRegistry metrics = new MetricRegistry();
+
/** YARN RPC to communicate with the Resource Manager or Node Manager */
private YarnRPC yarnRPC;
@@ -190,6 +216,8 @@
@SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized")
private RMOperationHandler rmOperationHandler;
+
+ private RMOperationHandler providerRMOperationHandler;
/** Handle to communicate with the Node Manager*/
@SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized")
@@ -277,6 +305,11 @@
private ContainerId appMasterContainerID;
/**
+ * Monkey Service -may be null
+ */
+ private ChaosMonkeyService monkey;
+
+ /**
* ProviderService of this cluster
*/
@SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized")
@@ -293,7 +326,6 @@
*/
private int containerMaxCores;
-
/**
* limit container memory
*/
@@ -315,6 +347,10 @@
private String agentAccessUrl;
private CertificateManager certificateManager;
+ private WorkflowExecutorService<ExecutorService> executorService;
+
+ private final QueueService actionQueues = new QueueService();
+
/**
* Service Constructor
*/
@@ -322,9 +358,7 @@
super(SERVICE_CLASSNAME_SHORT);
}
-
-
- /* =================================================================== */
+/* =================================================================== */
/* service lifecycle methods */
/* =================================================================== */
@@ -340,7 +374,7 @@
SliderAMCreateAction createAction = (SliderAMCreateAction) action;
//sort out the location of the AM
serviceArgs.applyDefinitions(conf);
- serviceArgs.applyFileSystemURL(conf);
+ serviceArgs.applyFileSystemBinding(conf);
String rmAddress = createAction.getRmAddress();
if (rmAddress != null) {
@@ -348,7 +382,7 @@
SliderUtils.setRmSchedulerAddress(conf, rmAddress);
}
serviceArgs.applyDefinitions(conf);
- serviceArgs.applyFileSystemURL(conf);
+ serviceArgs.applyFileSystemBinding(conf);
//init security with our conf
if (SliderUtils.isHadoopClusterSecure(conf)) {
log.info("Secure mode with kerberos realm {}",
@@ -368,11 +402,25 @@
//look at settings of Hadoop Auth, to pick up a problem seen once
checkAndWarnForAuthTokenProblems();
+ executorService = new WorkflowExecutorService<ExecutorService>("AmExecutor",
+ Executors.newCachedThreadPool(
+ new ServiceThreadFactory("AmExecutor", true)));
+ addService(executorService);
+
+
+ addService(actionQueues);
//init all child services
super.serviceInit(conf);
}
+
+ @Override
+ protected void serviceStart() throws Exception {
+ super.serviceStart();
+ executorService.execute(new QueueExecutor(this, actionQueues));
+ executorService.execute(actionQueues);
+ }
-/* =================================================================== */
+ /* =================================================================== */
/* RunService methods called from ServiceLauncher */
/* =================================================================== */
@@ -412,7 +460,8 @@
String action = serviceArgs.getAction();
List<String> actionArgs = serviceArgs.getActionArgs();
int exitCode;
- switch (action) {
+/* JDK7
+ switch (action) {
case SliderActions.ACTION_HELP:
log.info(getName() + serviceArgs.usage());
exitCode = LauncherExitCodes.EXIT_USAGE;
@@ -423,6 +472,15 @@
default:
throw new SliderException("Unimplemented: " + action);
}
+ */
+ if (action.equals(SliderActions.ACTION_HELP)) {
+ log.info(getName() + serviceArgs.usage());
+ exitCode = SliderExitCodes.EXIT_USAGE;
+ } else if (action.equals(SliderActions.ACTION_CREATE)) {
+ exitCode = createAndRunCluster(actionArgs.get(0));
+ } else {
+ throw new SliderException("Unimplemented: " + action);
+ }
log.info("Exiting AM; final exit code = {}", exitCode);
return exitCode;
}
@@ -483,10 +541,9 @@
serviceConf.set(SliderAmIpFilter.WS_CONTEXT_ROOT, WS_CONTEXT_ROOT + "|" + WS_AGENT_CONTEXT_ROOT);
//get our provider
- MapOperations globalInternalOptions =
- instanceDefinition.getInternalOperations().getGlobalOptions();
+ MapOperations globalInternalOptions = getGlobalInternalOptions();
String providerType = globalInternalOptions.getMandatoryOption(
- OptionKeys.INTERNAL_PROVIDER_NAME);
+ InternalKeys.INTERNAL_PROVIDER_NAME);
log.info("Cluster provider type is {}", providerType);
SliderProviderFactory factory =
SliderProviderFactory.createSliderProviderFactory(
@@ -494,6 +551,9 @@
providerService = factory.createServerProvider();
// init the provider BUT DO NOT START IT YET
initAndAddService(providerService);
+ providerRMOperationHandler =
+ new ProviderNotifyingOperationHandler(providerService);
+
// create a slider AM provider
sliderAMProvider = new SliderAMProviderService();
initAndAddService(sliderAMProvider);
@@ -553,7 +613,7 @@
}
Map<String, String> envVars;
-
+ List<Container> liveContainers;
/**
* It is critical this section is synchronized, to stop async AM events
* arriving while registering a restarting AM.
@@ -593,7 +653,7 @@
//build the role map
List<ProviderRole> providerRoles =
- new ArrayList<>(providerService.getRoles());
+ new ArrayList<ProviderRole>(providerService.getRoles());
providerRoles.addAll(SliderAMClientProvider.ROLES);
// Start up the WebApp and track the URL for it
@@ -615,7 +675,7 @@
.start(webApp);
appMasterTrackingUrl = "http://" + appMasterHostname + ":" + webApp.port();
WebAppService<SliderAMWebApp> webAppService =
- new WebAppService<>("slider", webApp);
+ new WebAppService<SliderAMWebApp>("slider", webApp);
webAppService.init(serviceConf);
webAppService.start();
@@ -655,13 +715,16 @@
}
// extract container list
- List<Container> liveContainers =
- response.getContainersFromPreviousAttempts();
+
+ liveContainers = response.getContainersFromPreviousAttempts();
//now validate the installation
Configuration providerConf =
providerService.loadProviderConfigurationInformation(confDir);
+ providerService
+ .initializeApplicationConfiguration(instanceDefinition, fs);
+
providerService.validateApplicationConfiguration(instanceDefinition,
confDir,
securityEnabled);
@@ -671,12 +734,17 @@
//build the instance
appState.buildInstance(instanceDefinition,
- providerConf,
- providerRoles,
- fs.getFileSystem(),
- historyDir,
- liveContainers,
- appInformation);
+ serviceConf,
+ providerConf,
+ providerRoles,
+ fs.getFileSystem(),
+ historyDir,
+ liveContainers,
+ appInformation,
+ new SimpleReleaseSelector());
+
+ providerService.rebuildContainerDetails(liveContainers,
+ instanceDefinition.getName(), appState.getRolePriorityMap());
// add the AM to the list of nodes in the cluster
@@ -687,21 +755,21 @@
// build up environment variables that the AM wants set in every container
// irrespective of provider and role.
- envVars = new HashMap<>();
+ envVars = new HashMap<String, String>();
if (hadoop_user_name != null) {
envVars.put(HADOOP_USER_NAME, hadoop_user_name);
}
}
String rolesTmpSubdir = appMasterContainerID.toString() + "/roles";
- String amTmpDir = globalInternalOptions.getMandatoryOption(OptionKeys.INTERNAL_AM_TMP_DIR);
+ String amTmpDir = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_AM_TMP_DIR);
Path tmpDirPath = new Path(amTmpDir);
Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir);
fs.getFileSystem().mkdirs(launcherTmpDirPath);
//launcher service
- launchService = new RoleLaunchService(this,
+ launchService = new RoleLaunchService(actionQueues,
providerService,
fs,
new Path(getGeneratedConfDir()),
@@ -714,21 +782,24 @@
//Give the provider restricted access to the state, registry
- providerService.bind(stateForProviders, registry, this);
- sliderAMProvider.bind(stateForProviders, registry, null);
+ providerService.bind(stateForProviders, registry, actionQueues,
+ liveContainers);
+ sliderAMProvider.bind(stateForProviders, registry, actionQueues,
+ liveContainers);
// now do the registration
registerServiceInstance(clustername, appid);
+ // chaos monkey
+ maybeStartMonkey();
+
+ // Start the Slider AM provider
sliderAMProvider.start();
-
-
- // launch the provider; this is expected to trigger a callback that
+ // launch the real provider; this is expected to trigger a callback that
// starts the node review process
launchProviderService(instanceDefinition, confDir);
-
try {
//now block waiting to be told to exit the process
waitForAMCompletionSignal();
@@ -742,7 +813,12 @@
private void startAgentWebApp(MapOperations appInformation,
Configuration serviceConf) {
- LOG_YARN.info("AM classpath:" + ((URLClassLoader) AgentWebApp.class.getClassLoader() ).getURLs());
+ URL[] urls = ((URLClassLoader) AgentWebApp.class.getClassLoader() ).getURLs();
+ StringBuilder sb = new StringBuilder("AM classpath:");
+ for (URL url : urls) {
+ sb.append("\n").append(url.toString());
+ }
+ LOG_YARN.info(sb.append("\n").toString());
// Start up the agent web app and track the URL for it
AgentWebApp agentWebApp = AgentWebApp.$for(AgentWebApp.BASE_PATH,
new WebAppApiImpl(this,
@@ -826,6 +902,31 @@
}
/**
+ * Register/re-register a component (that is already in the app state
+ * @param id the component
+ */
+ public boolean registerComponent(ContainerId id) {
+ RoleInstance instance = appState.getOwnedContainer(id);
+ if (instance == null) {
+ return false;
+ }
+ // this is where component registrations will go
+ log.info("Registering component {}", id);
+
+ return true;
+ }
+
+ /**
+ * unregister a component. At the time this message is received,
+ * the component may already been deleted from/never added to
+ * the app state
+ * @param id the component
+ */
+ public void unregisterComponent(ContainerId id) {
+ log.info("Unregistering component {}", id);
+ }
+
+ /**
* looks for a specific case where a token file is provided as an environment
* variable, yet the file is not there.
*
@@ -861,9 +962,18 @@
* @return the generated configuration dir
*/
public String getGeneratedConfDir() {
+ return getGlobalInternalOptions().get(
+ InternalKeys.INTERNAL_GENERATED_CONF_PATH);
+ }
+
+ /**
+ * Get the global internal options for the AM
+ * @return a map to access the internals
+ */
+ public MapOperations getGlobalInternalOptions() {
return getInstanceDefinition()
.getInternalOperations().
- getGlobalOptions().get(OptionKeys.INTERNAL_GENERATED_CONF_PATH);
+ getGlobalOptions();
}
/**
@@ -875,6 +985,22 @@
}
/**
+ * Get the AM log
+ * @return the log of the AM
+ */
+ public static Logger getLog() {
+ return log;
+ }
+
+ /**
+ * Get the application state
+ * @return the application state
+ */
+ public AppState getAppState() {
+ return appState;
+ }
+
+ /**
* Block until it is signalled that the AM is done
*/
private void waitForAMCompletionSignal() {
@@ -937,7 +1063,6 @@
//stop any launches in progress
launchService.stop();
-
//now release all containers
releaseAllContainers();
@@ -945,8 +1070,8 @@
// signal to the RM
log.info("Application completed. Signalling finish to RM");
-
//if there were failed containers and the app isn't already down as failing, it is now
+/*
int failedContainerCount = appState.getFailedCountainerCount();
if (failedContainerCount != 0 &&
appStatus == FinalApplicationStatus.SUCCEEDED) {
@@ -955,12 +1080,20 @@
"Completed with exit code = " + exitCode + " - " + getContainerDiagnosticInfo();
success = false;
}
+*/
try {
log.info("Unregistering AM status={} message={}", appStatus, appMessage);
asyncRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
+/* JDK7
} catch (YarnException | IOException e) {
log.info("Failed to unregister application: " + e, e);
}
+*/
+ } catch (IOException e) {
+ log.info("Failed to unregister application: " + e, e);
+ } catch (YarnException e) {
+ log.info("Failed to unregister application: " + e, e);
+ }
}
/**
@@ -1016,8 +1149,8 @@
@Override //AMRMClientAsync
public void onContainersAllocated(List<Container> allocatedContainers) {
LOG_YARN.info("onContainersAllocated({})", allocatedContainers.size());
- List<ContainerAssignment> assignments = new ArrayList<>();
- List<AbstractRMOperation> operations = new ArrayList<>();
+ List<ContainerAssignment> assignments = new ArrayList<ContainerAssignment>();
+ List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>();
//app state makes all the decisions
appState.onContainersAllocated(allocatedContainers, assignments, operations);
@@ -1030,7 +1163,7 @@
}
//for all the operations, exec them
- rmOperationHandler.execute(operations);
+ executeRMOperations(operations);
log.info("Diagnostics: " + getContainerDiagnosticInfo());
}
@@ -1050,20 +1183,15 @@
// non complete containers should not be here
assert (status.getState() == ContainerState.COMPLETE);
- AppState.NodeCompletionResult result = appState.onCompletedNode(
- getConfig(), status);
+ AppState.NodeCompletionResult result = appState.onCompletedNode(status);
if (result.containerFailed) {
RoleInstance ri = result.roleInstance;
log.error("Role instance {} failed ", ri);
}
- }
- // ask for more containers if any failed
- // In the case of Slider, we don't expect containers to complete since
- // Slider is a long running application. Keep track of how many containers
- // are completing. If too many complete, abort the application
- // TODO: this needs to be better thought about (and maybe something to
- // better handle in Yarn for long running apps)
+ getProviderService().notifyContainerCompleted(containerId);
+ queue(new UnregisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS));
+ }
try {
reviewRequestAndReleaseNodes();
@@ -1076,19 +1204,52 @@
* Implementation of cluster flexing.
* It should be the only way that anything -even the AM itself on startup-
* asks for nodes.
+ * @param resources the resource tree
* @return true if the any requests were made
* @throws IOException
*/
- private boolean flexCluster(ConfTree updated)
+ private boolean flexCluster(ConfTree resources)
throws IOException, SliderInternalStateException, BadConfigException {
- appState.updateResourceDefinitions(updated);
+ appState.updateResourceDefinitions(resources);
+
+ // reset the scheduled windows...the values
+ // may have changed
+ appState.resetFailureCounts();
+
+
// ask for more containers if needed
return reviewRequestAndReleaseNodes();
}
/**
+ * Schedule the failure window
+ * @param resources the resource tree
+ * @throws BadConfigException if the window is out of range
+ */
+ private void scheduleFailureWindowResets(ConfTree resources) throws
+ BadConfigException {
+ ResetFailureWindow reset = new ResetFailureWindow();
+ ConfTreeOperations ops = new ConfTreeOperations(resources);
+ MapOperations globals = ops.getGlobalOptions();
+ long seconds = globals.getTimeRange(ResourceKeys.CONTAINER_FAILURE_WINDOW,
+ ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS,
+ ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS,
+ ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_MINUTES, 0);
+ if (seconds > 0) {
+ log.info(
+ "Scheduling the failure window reset interval to every {} seconds",
+ seconds);
+ RenewingAction<ResetFailureWindow> renew = new RenewingAction<ResetFailureWindow>(
+ reset, seconds, seconds, TimeUnit.SECONDS, 0);
+ actionQueues.renewing("failures", renew);
+ } else {
+ log.info("Failure window reset interval is not set");
+ }
+ }
+
+ /**
* Look at where the current node state is -and whether it should be changed
*/
private synchronized boolean reviewRequestAndReleaseNodes()
@@ -1100,8 +1261,10 @@
}
try {
List<AbstractRMOperation> allOperations = appState.reviewRequestAndReleaseNodes();
+ // tell the provider
+ providerRMOperationHandler.execute(allOperations);
//now apply the operations
- rmOperationHandler.execute(allOperations);
+ executeRMOperations(allOperations);
return !allOperations.isEmpty();
} catch (TriggerClusterTeardownException e) {
@@ -1117,7 +1280,7 @@
*/
private void releaseAllContainers() {
//now apply the operations
- rmOperationHandler.execute(appState.releaseAllContainers());
+ executeRMOperations(appState.releaseAllContainers());
}
/**
@@ -1152,7 +1315,8 @@
public void onError(Throwable e) {
//callback says it's time to finish
LOG_YARN.error("AMRMClientAsync.onError() received " + e, e);
- signalAMComplete(EXIT_EXCEPTION_THROWN, "AMRMClientAsync.onError() received " + e);
+ signalAMComplete(EXIT_EXCEPTION_THROWN,
+ "AMRMClientAsync.onError() received " + e);
}
/* =================================================================== */
@@ -1187,8 +1351,8 @@
YarnException {
SliderUtils.getCurrentUser();
String message = request.getMessage();
- log.info("SliderAppMasterApi.stopCluster: {}",message);
- signalAMComplete(EXIT_CLIENT_INITIATED_SHUTDOWN, message);
+ log.info("SliderAppMasterApi.stopCluster: {}", message);
+ schedule(new ActionStopSlider(message, 1000, TimeUnit.MILLISECONDS));
return Messages.StopClusterResponseProto.getDefaultInstance();
}
@@ -1200,8 +1364,8 @@
String payload = request.getClusterSpec();
ConfTreeSerDeser confTreeSerDeser = new ConfTreeSerDeser();
- ConfTree updated = confTreeSerDeser.fromJson(payload);
- boolean flexed = flexCluster(updated);
+ ConfTree updatedResources = confTreeSerDeser.fromJson(payload);
+ boolean flexed = flexCluster(updatedResources);
return Messages.FlexClusterResponseProto.newBuilder().setResponse(flexed).build();
}
@@ -1214,8 +1378,7 @@
String result;
//quick update
//query and json-ify
- ClusterDescription cd;
- cd = getCurrentClusterStatus();
+ ClusterDescription cd = updateClusterStatus();
result = cd.toJsonString();
String stat = result;
return Messages.GetJSONClusterStatusResponseProto.newBuilder()
@@ -1223,19 +1386,6 @@
.build();
}
- /**
- * Get the current cluster status, including any provider-specific info
- * @return a status document
- */
- public ClusterDescription getCurrentClusterStatus() {
- ClusterDescription cd;
- synchronized (this) {
- updateClusterStatus();
- cd = getClusterDescription();
- }
- return cd;
- }
-
@Override
public Messages.GetInstanceDefinitionResponseProto getInstanceDefinition(
@@ -1334,31 +1484,38 @@
//throws NoSuchNodeException if it is missing
RoleInstance instance =
appState.getLiveInstanceByContainerID(containerID);
- List<AbstractRMOperation> opsList =
- new LinkedList<>();
- ContainerReleaseOperation release =
- new ContainerReleaseOperation(instance.getId());
- opsList.add(release);
- //now apply the operations
- rmOperationHandler.execute(opsList);
+ queue(new ActionKillContainer(instance.getId(), 0, TimeUnit.MILLISECONDS,
+ rmOperationHandler));
Messages.KillContainerResponseProto.Builder builder =
Messages.KillContainerResponseProto.newBuilder();
builder.setSuccess(true);
return builder.build();
}
+ public void executeRMOperations(List<AbstractRMOperation> operations) {
+ rmOperationHandler.execute(operations);
+ }
+
+ /**
+ * Get the RM operations handler for direct scheduling of work.
+ */
+ @VisibleForTesting
+ public RMOperationHandler getRmOperationHandler() {
+ return rmOperationHandler;
+ }
+
@Override
- public Messages.AMSuicideResponseProto amSuicide(Messages.AMSuicideRequestProto request) throws
- IOException,
- YarnException {
+ public Messages.AMSuicideResponseProto amSuicide(
+ Messages.AMSuicideRequestProto request)
+ throws IOException, YarnException {
int signal = request.getSignal();
String text = request.getText();
int delay = request.getDelay();
log.info("AM Suicide with signal {}, message {} delay = {}", signal, text, delay);
- SliderUtils.haltAM(signal, text, delay);
- Messages.AMSuicideResponseProto.Builder builder =
- Messages.AMSuicideResponseProto.newBuilder();
- return builder.build();
+ ActionHalt action = new ActionHalt(signal, text, delay,
+ TimeUnit.MILLISECONDS);
+ schedule(action);
+ return Messages.AMSuicideResponseProto.getDefaultInstance();
}
/* =================================================================== */
@@ -1368,10 +1525,10 @@
/**
* Update the cluster description with anything interesting
*/
- public synchronized void updateClusterStatus() {
+ public synchronized ClusterDescription updateClusterStatus() {
Map<String, String> providerStatus = providerService.buildProviderStatus();
assert providerStatus != null : "null provider status";
- appState.refreshClusterStatus(providerStatus);
+ return appState.refreshClusterStatus(providerStatus);
}
/**
@@ -1385,7 +1542,7 @@
protected synchronized void launchProviderService(AggregateConf instanceDefinition,
File confDir)
throws IOException, SliderException {
- Map<String, String> env = new HashMap<>();
+ Map<String, String> env = new HashMap<String, String>();
boolean execStarted = providerService.exec(instanceDefinition, confDir, env, this);
if (execStarted) {
providerService.registerServiceListener(this);
@@ -1418,28 +1575,26 @@
}
}
-
- /* =================================================================== */
- /* ProviderAMOperations */
- /* =================================================================== */
-
/**
- * Refreshes the container by releasing it and having it reallocated
+ * report container loss. If this isn't already known about, react
*
- * @param containerId id of the container to release
- * @param newHostIfPossible allocate the replacement container on a new host
- *
+ * @param containerId id of the container which has failed
* @throws SliderException
*/
- public void refreshContainer(String containerId, boolean newHostIfPossible)
+ public synchronized void providerLostContainer(
+ ContainerId containerId)
throws SliderException {
- log.info(
- "Refreshing container {} per provider request.",
+ log.info("containerLostContactWithProvider: container {} lost",
containerId);
- rmOperationHandler.execute(appState.releaseContainer(containerId));
-
- // ask for more containers if needed
- reviewRequestAndReleaseNodes();
+ RoleInstance activeContainer = appState.getOwnedContainer(containerId);
+ if (activeContainer != null) {
+ executeRMOperations(appState.releaseContainer(containerId));
+ // ask for more containers if needed
+ log.info("Container released; triggering review");
+ reviewRequestAndReleaseNodes();
+ } else {
+ log.info("Container not in active set - ignoring");
+ }
}
/* =================================================================== */
@@ -1499,7 +1654,6 @@
* @param ctx context
* @param instance node details
*/
- @Override // ContainerStartOperation
public void startContainer(Container container,
ContainerLaunchContext ctx,
RoleInstance instance) {
@@ -1527,10 +1681,14 @@
LOG_YARN.info("Started Container {} ", containerId);
RoleInstance cinfo = appState.onNodeManagerContainerStarted(containerId);
if (cinfo != null) {
- LOG_YARN.info("Deployed instance of role {}", cinfo.role);
+ LOG_YARN.info("Deployed instance of role {} onto {}",
+ cinfo.role, containerId);
//trigger an async container status
nmClientAsync.getContainerStatusAsync(containerId,
cinfo.container.getNodeId());
+ // push out a registration
+ queue(new RegisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS));
+
} else {
//this is a hypothetical path not seen. We react by warning
log.error("Notified of started container that isn't pending {} - releasing",
@@ -1550,7 +1708,7 @@
public void onContainerStatusReceived(ContainerId containerId,
ContainerStatus containerStatus) {
LOG_YARN.debug("Container Status: id={}, status={}", containerId,
- containerStatus);
+ containerStatus);
}
@Override // NMClientAsync.CallbackHandler
@@ -1590,14 +1748,84 @@
}
/**
- * Get the username for the slider cluster as set in the environment
- * @return the username or null if none was set/it is a secure cluster
+ * Queue an action for immediate execution in the executor thread
+ * @param action action to execute
*/
- public String getHadoop_user_name() {
- return hadoop_user_name;
+ public void queue(AsyncAction action) {
+ actionQueues.put(action);
}
/**
+ * Schedule an action
+ * @param action for delayed execution
+ */
+ public void schedule(AsyncAction action) {
+ actionQueues.schedule(action);
+ }
+
+
+ /**
+ * Handle any exception in a thread. If the exception provides an exit
+ * code, that is the one that will be used
+ * @param thread thread throwing the exception
+ * @param exception exception
+ */
+ public void onExceptionInThread(Thread thread, Exception exception) {
+ log.error("Exception in {}: {}", thread.getName(), exception, exception);
+ int exitCode = EXIT_EXCEPTION_THROWN;
+ if (exception instanceof ExitCodeProvider) {
+ exitCode = ((ExitCodeProvider) exception).getExitCode();
+ }
+ signalAMComplete(exitCode, exception.toString());
+ }
+
+ /**
+ * Start the chaos monkey
+ * @return true if it started
+ */
+ private boolean maybeStartMonkey() {
+ MapOperations internals = getGlobalInternalOptions();
+
+ Boolean enabled =
+ internals.getOptionBool(InternalKeys.CHAOS_MONKEY_ENABLED,
+ InternalKeys.DEFAULT_CHAOS_MONKEY_ENABLED);
+ if (!enabled) {
+ log.info("Chaos monkey disabled");
+ }
+
+ long monkeyInterval = internals.getTimeRange(
+ InternalKeys.CHAOS_MONKEY_INTERVAL,
+ InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS,
+ InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS,
+ InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES,
+ 0);
+ log.info("Adding Chaos Monkey scheduled every {} seconds ({} hours)",
+ monkeyInterval, monkeyInterval/(60*60));
+ monkey = new ChaosMonkeyService(metrics, actionQueues);
+ int amKillProbability = internals.getOptionInt(
+ InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE,
+ InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE);
+ if (amKillProbability > 0) {
+ monkey.addTarget("AM killer",
+ new ChaosKillAM(actionQueues, -1), amKillProbability
+ );
+ }
+ int containerKillProbability = internals.getOptionInt(
+ InternalKeys.CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE,
+ InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE);
+ if (containerKillProbability > 0) {
+ monkey.addTarget("Container killer",
+ new ChaosKillContainer(appState, actionQueues, rmOperationHandler),
+ containerKillProbability
+ );
+ }
+ initAndAddService(monkey);
+ // and schedule it
+ schedule(monkey.getChaosAction(monkeyInterval, TimeUnit.SECONDS));
+ return true;
+ }
+
+ /**
* This is the main entry point for the service launcher.
* @param args command line arguments.
*/
@@ -1606,10 +1834,11 @@
//turn the args to a list
List<String> argsList = Arrays.asList(args);
//create a new list, as the ArrayList type doesn't push() on an insert
- List<String> extendedArgs = new ArrayList<>(argsList);
+ List<String> extendedArgs = new ArrayList<String>(argsList);
//insert the service name
extendedArgs.add(0, SERVICE_CLASSNAME);
//now have the service launcher do its work
ServiceLauncher.serviceMain(extendedArgs);
}
+
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java
new file mode 100644
index 0000000..c21e249
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Exit a JVM halt.
+ * @see ExitUtil#halt(int, String)
+ */
+public class ActionHalt extends AsyncAction {
+
+ private final int status;
+ private final String text;
+
+ public ActionHalt(
+ int status,
+ String text,
+ long delay, TimeUnit timeUnit) {
+ super("Halt", delay, ActionAttributes.HALTS_CLUSTER);
+ this.status = status;
+ this.text = text;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ ExitUtil.halt(status, text);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java
new file mode 100644
index 0000000..c1e7e6e
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation;
+import org.apache.slider.server.appmaster.operations.RMOperationHandler;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+public class ActionKillContainer extends AsyncAction {
+
+ private final ContainerId containerId;
+ private final RMOperationHandler operationHandler;
+ public ActionKillContainer(
+ ContainerId containerId,
+ long delay,
+ TimeUnit timeUnit,
+ RMOperationHandler operationHandler) {
+ super("kill container", delay, timeUnit);
+ this.operationHandler = operationHandler;
+ Preconditions.checkArgument(containerId != null);
+
+ this.containerId = containerId;
+ }
+
+ /**
+ * Get the container ID to kill
+ * @return
+ */
+ public ContainerId getContainerId() {
+ return containerId;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ List<AbstractRMOperation> opsList = new LinkedList<AbstractRMOperation>();
+ ContainerReleaseOperation release = new ContainerReleaseOperation(containerId);
+ opsList.add(release);
+ //now apply the operations
+ operationHandler.execute(opsList);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java
new file mode 100644
index 0000000..d95dc74
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.apache.slider.server.appmaster.state.RoleInstance;
+
+import java.util.Locale;
+
+/**
+ * Start a container
+ * @see SliderAppMaster#startContainer(Container, ContainerLaunchContext, RoleInstance)
+ */
+public class ActionStartContainer extends AsyncAction {
+
+ private final Container container;
+ private final ContainerLaunchContext ctx;
+ private final RoleInstance instance;
+
+ public ActionStartContainer(String name,
+ long delay,
+ Container container,
+ ContainerLaunchContext ctx,
+ RoleInstance instance) {
+ super(
+ String.format(Locale.ENGLISH,
+ "%s %s: /",
+ name , container.getId().toString()),
+ delay);
+ this.container = container;
+ this.ctx = ctx;
+ this.instance = instance;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ appMaster.startContainer(container, ctx, instance);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopQueue.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopQueue.java
new file mode 100644
index 0000000..66a3961
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopQueue.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Action to tell a queue executor to stop -after handing this on/executing it
+ */
+public class ActionStopQueue extends AsyncAction {
+
+ public ActionStopQueue(long delay) {
+ super("stop queue", delay);
+ }
+
+ public ActionStopQueue(long delay,
+ TimeUnit timeUnit) {
+ super("stop queue", delay, timeUnit);
+ }
+
+ public ActionStopQueue(String name,
+ long delay,
+ TimeUnit timeUnit) {
+ super(name, delay, timeUnit);
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ // no-op
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
new file mode 100644
index 0000000..f084383
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.core.main.LauncherExitCodes;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+public class ActionStopSlider extends AsyncAction {
+ public ActionStopSlider(String message,
+ long delay) {
+ super(message, delay, ActionAttributes.HALTS_CLUSTER);
+ }
+
+ public ActionStopSlider(String name,
+ long delay,
+ TimeUnit timeUnit) {
+ super(name, delay, timeUnit, ActionAttributes.HALTS_CLUSTER);
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ String message = name;
+ SliderAppMaster.getLog().info("SliderAppMasterApi.stopCluster: {}",
+ message);
+ appMaster.signalAMComplete(
+ LauncherExitCodes.EXIT_CLIENT_INITIATED_SHUTDOWN,
+ message);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java
new file mode 100644
index 0000000..996390d
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.common.tools.SliderUtils;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.concurrent.Delayed;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+
+public abstract class AsyncAction implements Delayed {
+
+ private static final AtomicLong sequencer = new AtomicLong(0);
+
+ public final String name;
+ private long nanos;
+ private final EnumSet<ActionAttributes> attrs;
+ private final long sequenceNumber = sequencer.incrementAndGet();
+
+
+ protected AsyncAction(String name) {
+ this(name, 0);
+ }
+
+ protected AsyncAction(String name,
+ long delayMillis) {
+ this(name, delayMillis, TimeUnit.MILLISECONDS);
+ }
+
+ protected AsyncAction(String name,
+ long delay,
+ TimeUnit timeUnit) {
+ this.name = name;
+ this.setNanos(convertAndOffset(delay, timeUnit));
+ attrs = EnumSet.noneOf(ActionAttributes.class);
+ }
+
+ protected AsyncAction(String name,
+ long delay,
+ TimeUnit timeUnit,
+ EnumSet<ActionAttributes> attrs) {
+ this.name = name;
+ this.setNanos(convertAndOffset(delay, timeUnit));
+ this.attrs = attrs;
+ }
+
+ protected AsyncAction(String name,
+ long delay,
+ TimeUnit timeUnit,
+ ActionAttributes... attributes) {
+ this(name, delay, timeUnit);
+ Collections.addAll(attrs, attributes);
+ }
+
+ protected AsyncAction(String name,
+ long delayMillis,
+ ActionAttributes... attributes) {
+ this(name, delayMillis, TimeUnit.MILLISECONDS);
+ }
+
+ protected long convertAndOffset(long delay, TimeUnit timeUnit) {
+ return now() + TimeUnit.NANOSECONDS.convert(delay, timeUnit);
+ }
+
+ /**
+ * The current time in nanos
+ * @return now
+ */
+ protected long now() {
+ return System.nanoTime();
+ }
+
+ @Override
+ public long getDelay(TimeUnit unit) {
+ return unit.convert(getNanos() - now(), TimeUnit.NANOSECONDS);
+ }
+
+ @Override
+ public int compareTo(Delayed that) {
+ if (this == that) {
+ return 0;
+ }
+ return SliderUtils.compareTo(
+ getDelay(TimeUnit.NANOSECONDS),
+ that.getDelay(TimeUnit.NANOSECONDS));
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb =
+ new StringBuilder(super.toString());
+ sb.append(" name='").append(name).append('\'');
+ sb.append(", nanos=").append(getNanos());
+ sb.append(", attrs=").append(attrs);
+ sb.append(", sequenceNumber=").append(sequenceNumber);
+ sb.append('}');
+ return sb.toString();
+ }
+
+ protected EnumSet<ActionAttributes> getAttrs() {
+ return attrs;
+ }
+
+ /**
+ * Ask if an action has a specific attribute
+ * @param attr attribute
+ * @return true iff the action has the specific attribute
+ */
+ public boolean hasAttr(ActionAttributes attr) {
+ return attrs.contains(attr);
+ }
+
+ /**
+ * Actual application
+ * @param appMaster
+ * @param queueService
+ * @param appState
+ * @throws IOException
+ */
+ public abstract void execute(SliderAppMaster appMaster,
+ QueueAccess queueService, AppState appState) throws Exception;
+
+ public long getNanos() {
+ return nanos;
+ }
+
+ public void setNanos(long nanos) {
+ this.nanos = nanos;
+ }
+
+ public enum ActionAttributes {
+ SHRINKS_CLUSTER,
+ EXPANDS_CLUSTER,
+ HALTS_CLUSTER,
+ }
+
+
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java
new file mode 100644
index 0000000..2aa67bb
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+/**
+ * Report container loss to the AM
+ * {@link SliderAppMaster#providerLostContainer(ContainerId)}
+ */
+public class ProviderReportedContainerLoss extends AsyncAction {
+
+ private final ContainerId containerId;
+
+ public ProviderReportedContainerLoss(ContainerId containerId) {
+ super("lost container " + containerId);
+ this.containerId = containerId;
+ }
+
+ public ProviderReportedContainerLoss(
+ ContainerId containerId, long delayMillis) {
+ super("lost container " + containerId, delayMillis);
+ this.containerId = containerId;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ appMaster.providerLostContainer(containerId);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java
new file mode 100644
index 0000000..4577025
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+public class ProviderStartupCompleted extends AsyncAction {
+
+ public ProviderStartupCompleted() {
+ super("ProviderStartupCompleted");
+ }
+
+ public ProviderStartupCompleted(long delayMillis) {
+ super("ProviderStartupCompleted", delayMillis);
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ appMaster.eventCallbackEvent(null);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java
new file mode 100644
index 0000000..cffaf5e
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+/**
+ * Access for queue operations
+ */
+public interface QueueAccess {
+ /**
+ * Put an action on the immediate queue -to be executed when the queue
+ * reaches it.
+ * @param action action to queue
+ */
+ void put(AsyncAction action);
+
+ /**
+ * Put a delayed action: this will only be added to the main queue
+ * after its action time has been reached
+ * @param action action to queue
+ */
+ void schedule(AsyncAction action);
+
+ /**
+ * Remove an action from the queues.
+ * @param action action to remove
+ * @return true if the action was removed
+ */
+ boolean remove(AsyncAction action);
+
+ /**
+ * Add a named renewing action
+ * @param name name
+ * @param renewingAction wrapped action
+ */
+ void renewing(String name,
+ RenewingAction<? extends AsyncAction> renewingAction);
+
+ /**
+ * Look up a renewing action
+ * @param name name of the action
+ * @return the action or null if none was found
+ */
+ RenewingAction<? extends AsyncAction> lookupRenewingAction(String name);
+
+ /**
+ * Remove a renewing action
+ * @param name action name name of the action
+ * @return true if the action was found and removed.
+ */
+ boolean removeRenewingAction(String name);
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueExecutor.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueExecutor.java
new file mode 100644
index 0000000..87956db
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueExecutor.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Executor for async actions - hands them off to the AM as
+ * appropriate
+ */
+public class QueueExecutor implements Runnable {
+ private static final Logger log =
+ LoggerFactory.getLogger(QueueExecutor.class);
+
+ private final SliderAppMaster appMaster;
+ private final QueueService actionQueues;
+ private final AppState appState;
+
+ public QueueExecutor(SliderAppMaster appMaster,
+ QueueService actionQueues) {
+ Preconditions.checkNotNull(appMaster);
+ Preconditions.checkNotNull(actionQueues);
+
+ this.appMaster = appMaster;
+ this.actionQueues = actionQueues;
+ this.appState = appMaster.getAppState();
+ }
+
+ @VisibleForTesting
+ QueueExecutor(QueueService actionQueues) {
+ Preconditions.checkNotNull(actionQueues);
+ this.appMaster = null;
+ this.appState = null;
+ this.actionQueues = actionQueues;
+ }
+
+ /**
+ * Run until the queue has been told to stop
+ */
+ @Override
+ public void run() {
+ AsyncAction take = null;
+ try {
+ log.info("Queue Executor run() started");
+ do {
+ take = actionQueues.actionQueue.take();
+ log.debug("Executing {}", take);
+
+ take.execute(appMaster, actionQueues, appState);
+ } while (!(take instanceof ActionStopQueue));
+ log.info("Queue Executor run() stopped");
+ } catch (Exception e) {
+ log.error("Exception processing {}: {}", take, e, e);
+ if (appMaster != null) {
+ appMaster.onExceptionInThread(Thread.currentThread(), e);
+ }
+ }
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java
new file mode 100644
index 0000000..6ad579d
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.slider.server.services.workflow.ServiceThreadFactory;
+import org.apache.slider.server.services.workflow.WorkflowExecutorService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.BlockingDeque;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.DelayQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * The Queue service provides immediate and scheduled queues, as well
+ * as an executor thread that moves queued actions from the scheduled
+ * queue to the immediate one.
+ *
+ * <p>
+ * This code to be revisited to see if all that was needed is the single scheduled
+ * queue, implicitly making actions immediate by giving them an execution
+ * time of "now". It would force having a sequence number to all actions, one
+ * which the queue would have to set from its (monotonic, thread-safe) counter
+ * on every submission, with a modified comparison operator. This would guarantee
+ * that earlier submissions were picked before later ones.
+ */
+public class QueueService extends WorkflowExecutorService<ExecutorService>
+implements Runnable, QueueAccess {
+ private static final Logger log =
+ LoggerFactory.getLogger(QueueService.class);
+ public static final String NAME = "Action Queue";
+
+ /**
+ * Immediate actions.
+ */
+ public final BlockingDeque<AsyncAction> actionQueue =
+ new LinkedBlockingDeque<AsyncAction>();
+
+ /**
+ * Actions to be scheduled in the future
+ */
+ public final DelayQueue<AsyncAction> scheduledActions = new DelayQueue<AsyncAction>();
+
+ /**
+ * Map of renewing actions by name ... this is to allow them to
+ * be cancelled by name
+ */
+ private final Map<String, RenewingAction<? extends AsyncAction>> renewingActions
+ = new ConcurrentHashMap<String, RenewingAction<? extends AsyncAction>>();
+
+ /**
+ * Create a queue instance with a single thread executor
+ */
+ public QueueService() {
+ super(NAME,
+ ServiceThreadFactory.singleThreadExecutor(NAME, true));
+ }
+
+ @Override
+ public void put(AsyncAction action) {
+ log.debug("Queueing {}", action);
+ actionQueue.add(action);
+ }
+
+ @Override
+ public void schedule(AsyncAction action) {
+ log.debug("Scheduling {}", action);
+ scheduledActions.add(action);
+ }
+
+ @Override
+ public boolean remove(AsyncAction action) {
+ boolean removedFromDelayQueue = scheduledActions.remove(action);
+ boolean removedFromActions = actionQueue.remove(action);
+ return removedFromActions || removedFromDelayQueue;
+ }
+
+ @Override
+ public void renewing(String name,
+ RenewingAction<? extends AsyncAction> renewingAction) {
+ log.debug("Adding renewing Action \"{}\": {}", name,
+ renewingAction.getAction());
+ if (removeRenewingAction(name)) {
+ log.debug("Removed predecessor action");
+ }
+ renewingActions.put(name, renewingAction);
+ schedule(renewingAction);
+ }
+
+ @Override
+ public RenewingAction<? extends AsyncAction> lookupRenewingAction(String name) {
+ return renewingActions.get(name);
+ }
+
+ @Override
+ public boolean removeRenewingAction(String name) {
+ RenewingAction<? extends AsyncAction> action = renewingActions.remove(name);
+ return action != null && remove(action);
+ }
+
+ /**
+ * Stop the service by scheduling an {@link ActionStopQueue} action
+ * ..if the processor thread is working this will propagate through
+ * and stop the queue handling after all other actions complete.
+ * @throws Exception
+ */
+ @Override
+ protected void serviceStop() throws Exception {
+ ActionStopQueue stopQueue = new ActionStopQueue("serviceStop: "+ this,
+ 0, TimeUnit.MILLISECONDS);
+ schedule(stopQueue);
+ super.serviceStop();
+ }
+
+ /**
+ * Flush an action queue of all types of a specific action
+ * @param clazz
+ */
+ protected void flushActionQueue(Class<? extends AsyncAction> clazz) {
+ Iterator<AsyncAction> iterator =
+ actionQueue.descendingIterator();
+ while (iterator.hasNext()) {
+ AsyncAction next = iterator.next();
+ if (next.getClass().equals(clazz)) {
+ iterator.remove();
+ }
+ }
+ }
+
+ /**
+ * Run until the queue has been told to stop
+ */
+ @Override
+ public void run() {
+ try {
+
+ log.info("QueueService processor started");
+
+ AsyncAction take;
+ do {
+ take = scheduledActions.take();
+ log.debug("Propagating {}", take);
+ actionQueue.put(take);
+ } while (!(take instanceof ActionStopQueue));
+ log.info("QueueService processor terminated");
+ } catch (InterruptedException e) {
+ //game over
+ }
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java
new file mode 100644
index 0000000..a8a6fe2
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+public class RegisterComponentInstance extends AsyncAction {
+
+
+ public final ContainerId containerId;
+
+ public RegisterComponentInstance(ContainerId containerId, long delay,
+ TimeUnit timeUnit) {
+ super("RegisterComponentInstance :" + containerId,
+ delay, timeUnit);
+ Preconditions.checkArgument(containerId != null);
+ this.containerId = containerId;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+
+ appMaster.registerComponent(containerId);
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RenewingAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RenewingAction.java
new file mode 100644
index 0000000..c62582f
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RenewingAction.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.base.Preconditions;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * This action executes then reschedules an inner action; a limit
+ * can specify the number of times to run
+ */
+
+public class RenewingAction<A extends AsyncAction> extends AsyncAction {
+ private static final Logger log =
+ LoggerFactory.getLogger(RenewingAction.class);
+ private final A action;
+ private final long interval;
+ private final TimeUnit timeUnit;
+ public final AtomicInteger executionCount = new AtomicInteger();
+ public final int limit;
+
+
+ /**
+ * Rescheduling action
+ * @param action action to execute
+ * @param initialDelay initial delay
+ * @param interval interval for later delays
+ * @param timeUnit time unit for all times
+ * @param limit limit on the no. of executions. If 0 or less: no limit
+ */
+ public RenewingAction(A action,
+ long initialDelay,
+ long interval,
+ TimeUnit timeUnit,
+ int limit) {
+ super("renewing " + action.name, initialDelay, timeUnit, action.getAttrs());
+ // slightly superfluous as the super init above checks these values...retained
+ // in case that code is ever changed
+ Preconditions.checkArgument(action != null, "null actions");
+ this.action = action;
+ this.interval = interval;
+ this.timeUnit = timeUnit;
+ this.limit = limit;
+ }
+
+ /**
+ * Execute the inner action then reschedule ourselves
+ * @param appMaster
+ * @param queueService
+ * @param appState
+ * @throws Exception
+ */
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState)
+ throws Exception {
+ long exCount = executionCount.incrementAndGet();
+ log.debug("{}: Executing inner action count # {}", this, exCount);
+ action.execute(appMaster, queueService, appState);
+ boolean reschedule = true;
+ if (limit > 0) {
+ reschedule = limit > exCount;
+ }
+ if (reschedule) {
+ this.setNanos(convertAndOffset(interval, timeUnit));
+ log.debug("{}: rescheduling, new offset {} mS ", this,
+ getDelay(TimeUnit.MILLISECONDS));
+ queueService.schedule(this);
+ }
+ }
+
+ /**
+ * Get the action
+ * @return
+ */
+ public A getAction() {
+ return action;
+ }
+
+ public long getInterval() {
+ return interval;
+ }
+
+ public TimeUnit getTimeUnit() {
+ return timeUnit;
+ }
+
+ public int getExecutionCount() {
+ return executionCount.get();
+ }
+
+ public int getLimit() {
+ return limit;
+ }
+}
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/TestStub.groovy b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java
similarity index 61%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/TestStub.groovy
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java
index f683ded..28bcf55 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/TestStub.groovy
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java
@@ -16,17 +16,24 @@
* limitations under the License.
*/
-package org.apache.slider.providers.hbase
+package org.apache.slider.server.appmaster.actions;
-import org.junit.Test
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
/**
- * this is here to ensure there is always a test
+ * Requests the AM to reset the failure window
*/
-class TestStub {
+public class ResetFailureWindow extends AsyncAction {
- @Test
- public void testStubTest() throws Throwable {
+ public ResetFailureWindow() {
+ super("ResetFailureWindow");
+ }
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ appState.resetFailureCounts();
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java
new file mode 100644
index 0000000..78d9c1c
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+public class UnregisterComponentInstance extends AsyncAction {
+
+
+ public final ContainerId containerId;
+
+ public UnregisterComponentInstance(ContainerId containerId, long delay,
+ TimeUnit timeUnit) {
+ super("UnregisterComponentInstance :" + containerId.toString(),
+ delay, timeUnit);
+ this.containerId = containerId;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ appMaster.unregisterComponent(containerId);
+
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java
new file mode 100644
index 0000000..5905d2f
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.base.Preconditions;
+import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Entry in the chaos list
+ */
+public class ChaosEntry {
+
+ protected static final Logger log =
+ LoggerFactory.getLogger(ChaosEntry.class);
+ public final String name;
+ public final ChaosTarget target;
+ public final long probability;
+
+ private final MetricRegistry metrics;
+ private final Counter invocationCounter;
+
+
+ /**
+ * Constructor -includes validation of all arguments
+ * @param name
+ * @param target
+ * @param probability
+ */
+ public ChaosEntry(String name, ChaosTarget target, long probability,
+ MetricRegistry metrics) {
+ Preconditions.checkArgument(!StringUtils.isEmpty(name), "missing name");
+ Preconditions.checkArgument(target != null, "null target");
+ Preconditions.checkArgument(probability > 0, "negative probability");
+ Preconditions.checkArgument(probability <= ChaosMonkeyService.PERCENT_100,
+ "probability over 100%");
+ this.name = name;
+ this.target = target;
+ this.probability = probability;
+ this.metrics = metrics;
+ invocationCounter =
+ metrics.counter(MetricRegistry.name(ChaosEntry.class, name));
+ }
+
+ /**
+ * Trigger the chaos action
+ */
+ public void invokeChaos() {
+ log.info("Invoking {}", name);
+ invocationCounter.inc();
+ target.chaosAction();
+ }
+
+ /**
+ * Invoke Chaos if the trigger value is in range of the probability
+ * @param value trigger value, 0-10K
+ * @return true if the chaos method was invoked
+ */
+ public boolean maybeInvokeChaos(long value) {
+ log.debug("Probability {} trigger={}", probability, value);
+ if (value < probability) {
+ invokeChaos();
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java
new file mode 100644
index 0000000..3c1a914
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import org.apache.slider.server.appmaster.actions.ActionHalt;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Kill the AM
+ */
+public class ChaosKillAM implements ChaosTarget {
+
+ public static final int DELAY = 1000;
+ private final QueueAccess queues;
+ private final int exitCode;
+
+ public ChaosKillAM(QueueAccess queues, int exitCode) {
+ this.queues = queues;
+ this.exitCode = exitCode;
+ }
+
+ /**
+ * Trigger a delayed halt
+ */
+ @Override
+ public void chaosAction() {
+ queues.schedule(new ActionHalt(exitCode, "Chaos invoked halt", DELAY,
+ TimeUnit.MILLISECONDS));
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
new file mode 100644
index 0000000..daf2590
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import com.google.common.base.Preconditions;
+import org.apache.slider.server.appmaster.actions.ActionKillContainer;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.operations.RMOperationHandler;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.apache.slider.server.appmaster.state.RoleInstance;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Kill a container
+ */
+public class ChaosKillContainer implements ChaosTarget {
+ protected static final Logger log =
+ LoggerFactory.getLogger(ChaosKillContainer.class);
+ public static final int DELAY = 100;
+ private final AppState appState;
+ private final QueueAccess queues;
+ private final Random random = new Random();
+ private final RMOperationHandler operationHandler;
+
+ public ChaosKillContainer(AppState appState,
+ QueueAccess queues,
+ RMOperationHandler operationHandler) {
+ Preconditions.checkNotNull(appState);
+ Preconditions.checkNotNull(queues);
+ this.appState = appState;
+ this.queues = queues;
+ this.operationHandler = operationHandler;
+ }
+
+ /**
+ * Trigger a container kill
+ */
+ @Override
+ public void chaosAction() {
+ List<RoleInstance> liveContainers =
+ appState.cloneLiveContainerInfoList();
+ int size = liveContainers.size();
+ if (size == 0) {
+ log.info("No containers to kill");
+ return;
+ }
+ int target = random.nextInt(size);
+ RoleInstance roleInstance = liveContainers.get(target);
+ log.info("Killing {}", roleInstance);
+
+ queues.schedule(new ActionKillContainer(roleInstance.getId(),
+ DELAY, TimeUnit.MILLISECONDS, operationHandler));
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java
new file mode 100644
index 0000000..592889c
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import com.codahale.metrics.MetricRegistry;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.actions.RenewingAction;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A chaos monkey service which will invoke ChaosTarget events
+ */
+public class ChaosMonkeyService extends AbstractService {
+ protected static final Logger log =
+ LoggerFactory.getLogger(ChaosMonkeyService.class);
+ public static final int PERCENT_1 = 100;
+ public static final double PERCENT_1D = 100.0;
+
+ /**
+ * the percentage value as multiplied up
+ */
+ public static final int PERCENT_100 = 100 * PERCENT_1;
+ private final MetricRegistry metrics;
+ private final QueueAccess queues;
+ private final Random random = new Random();
+
+ private static final List<ChaosEntry> chaosEntries =
+ new ArrayList<ChaosEntry>();
+
+ public ChaosMonkeyService(MetricRegistry metrics, QueueAccess queues) {
+ super("ChaosMonkeyService");
+ this.metrics = metrics;
+ this.queues = queues;
+ }
+
+
+ public synchronized void addTarget(String name,
+ ChaosTarget target, long probability) {
+ log.info("Adding {} with probability {}", name, probability / PERCENT_1);
+ chaosEntries.add(new ChaosEntry(name, target, probability, metrics));
+ }
+
+ /**
+ * Iterate through all the entries and invoke chaos on those wanted
+ */
+ public void play() {
+ for (ChaosEntry chaosEntry : chaosEntries) {
+ long p = random.nextInt(PERCENT_100);
+ chaosEntry.maybeInvokeChaos(p);
+ }
+ }
+
+ public RenewingAction<MonkeyPlayAction> getChaosAction(long time, TimeUnit timeUnit) {
+ RenewingAction<MonkeyPlayAction> action = new RenewingAction<MonkeyPlayAction>(
+ new MonkeyPlayAction(this, 0, TimeUnit.MILLISECONDS),
+ time,
+ time,
+ timeUnit,
+ 0
+ );
+ return action;
+ }
+}
diff --git a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosTarget.java
similarity index 87%
copy from slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosTarget.java
index eefccbb..1c3a9ac 100644
--- a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosTarget.java
@@ -16,7 +16,9 @@
* limitations under the License.
*/
-package org.apache.slider.funtest.accumulo;
+package org.apache.slider.server.appmaster.monkey;
-class StubToForceGroovySrcToCompile {
+public interface ChaosTarget {
+
+ public void chaosAction();
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/MonkeyPlayAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/MonkeyPlayAction.java
new file mode 100644
index 0000000..20e4466
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/MonkeyPlayAction.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.actions.AsyncAction;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Queueable action which calls {@link ChaosMonkeyService#play()} when
+ * executed.
+ */
+public class MonkeyPlayAction extends AsyncAction {
+
+ private final ChaosMonkeyService monkey;
+
+ public MonkeyPlayAction(ChaosMonkeyService monkey, long delay,
+ TimeUnit timeUnit) {
+ super("chaos monkey", delay, timeUnit);
+ this.monkey = monkey;
+ }
+
+ @Override
+ public void execute(SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ monkey.play();
+ }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AbstractRMOperation.java
similarity index 84%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AbstractRMOperation.java
index e3e595f..2c55215 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AbstractRMOperation.java
@@ -16,16 +16,14 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
-public class AbstractRMOperation {
+public abstract class AbstractRMOperation {
/**
* Execute the operation
* @param asyncRMClient client
*/
- public void execute(RMOperationHandler handler) {
-
- }
+ public abstract void execute(RMOperationHandler handler);
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
similarity index 93%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
index 171c021..f7a95a7 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
@@ -16,12 +16,11 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster;
+package org.apache.slider.server.appmaster.operations;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
-import org.apache.slider.server.appmaster.state.RMOperationHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -45,6 +44,7 @@
}
@Override
+ @SuppressWarnings("unchecked")
public void addContainerRequest(AMRMClient.ContainerRequest req) {
client.addContainerRequest(req);
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerReleaseOperation.java
similarity index 95%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseOperation.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerReleaseOperation.java
index 8e73f19..3d2016b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerReleaseOperation.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
import org.apache.hadoop.yarn.api.records.ContainerId;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerRequestOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerRequestOperation.java
similarity index 95%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerRequestOperation.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerRequestOperation.java
index 25c3d60..711bb98 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerRequestOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerRequestOperation.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
import org.apache.hadoop.yarn.client.api.AMRMClient;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
similarity index 60%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
index 171c021..a24d9e5 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
@@ -16,36 +16,28 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster;
+package org.apache.slider.server.appmaster.operations;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.client.api.AMRMClient;
-import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
-import org.apache.slider.server.appmaster.state.RMOperationHandler;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.slider.providers.ProviderService;
-/**
- * Hands off RM operations to the Resource Manager
- */
-public class AsyncRMOperationHandler extends RMOperationHandler {
- protected static final Logger log =
- LoggerFactory.getLogger(AsyncRMOperationHandler.class);
- private final AMRMClientAsync client;
+public class ProviderNotifyingOperationHandler extends RMOperationHandler {
+
+ final ProviderService providerService;
- public AsyncRMOperationHandler(AMRMClientAsync client) {
- this.client = client;
+ public ProviderNotifyingOperationHandler(ProviderService providerService) {
+ this.providerService = providerService;
}
@Override
public void releaseAssignedContainer(ContainerId containerId) {
- log.debug("Releasing container {}", containerId);
-
- client.releaseAssignedContainer(containerId);
+ providerService.releaseAssignedContainer(containerId);
}
@Override
public void addContainerRequest(AMRMClient.ContainerRequest req) {
- client.addContainerRequest(req);
+ providerService.addContainerRequest(req);
+
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandler.java
similarity index 75%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/RMOperationHandler.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandler.java
index 4106b16..2b6e9e2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandler.java
@@ -16,19 +16,11 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster.state;
-
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.client.api.AMRMClient;
+package org.apache.slider.server.appmaster.operations;
import java.util.List;
-public abstract class RMOperationHandler {
-
-
- public abstract void releaseAssignedContainer(ContainerId containerId);
-
- public abstract void addContainerRequest(AMRMClient.ContainerRequest req);
+public abstract class RMOperationHandler implements RMOperationHandlerActions {
/**
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
similarity index 70%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
index e3e595f..6659cc9 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
@@ -16,16 +16,13 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
-public class AbstractRMOperation {
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
- /**
- * Execute the operation
- * @param asyncRMClient client
- */
- public void execute(RMOperationHandler handler) {
+public interface RMOperationHandlerActions {
+ void releaseAssignedContainer(ContainerId containerId);
- }
-
+ void addContainerRequest(AMRMClient.ContainerRequest req);
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index cc238ff..07976ef 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -19,6 +19,7 @@
package org.apache.slider.server.appmaster.state;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -35,7 +36,6 @@
import org.apache.slider.api.ClusterDescriptionKeys;
import org.apache.slider.api.ClusterDescriptionOperations;
import org.apache.slider.api.ClusterNode;
-import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.RoleKeys;
import org.apache.slider.api.StatusKeys;
@@ -54,8 +54,10 @@
import org.apache.slider.core.exceptions.NoSuchNodeException;
import org.apache.slider.core.exceptions.SliderInternalStateException;
import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
-import org.apache.slider.core.registry.docstore.PublishedConfigSet;
import org.apache.slider.providers.ProviderRole;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation;
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -65,6 +67,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@@ -98,7 +101,7 @@
* Flag set to indicate the application is live -this only happens
* after the buildInstance operation
*/
- boolean applicationLive = false;
+ private boolean applicationLive = false;
/**
* The definition of the instance. Flexing updates the resources section
@@ -132,7 +135,7 @@
* Client properties created via the provider -static for the life
* of the application
*/
- private Map<String, String> clientProperties = new HashMap<>();
+ private Map<String, String> clientProperties = new HashMap<String, String>();
/**
The cluster description published to callers
@@ -143,10 +146,13 @@
private ClusterDescription clusterSpec = new ClusterDescription();
private final Map<Integer, RoleStatus> roleStatusMap =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<Integer, RoleStatus>();
private final Map<String, ProviderRole> roles =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<String, ProviderRole>();
+
+ private final Map<Integer, ProviderRole> rolePriorityMap =
+ new ConcurrentHashMap<Integer, ProviderRole>();
/**
* The master node.
@@ -157,8 +163,8 @@
* Hash map of the containers we have. This includes things that have
* been allocated but are not live; it is a superset of the live list
*/
- private final ConcurrentMap<ContainerId, RoleInstance> activeContainers =
- new ConcurrentHashMap<>();
+ private final ConcurrentMap<ContainerId, RoleInstance> ownedContainers =
+ new ConcurrentHashMap<ContainerId, RoleInstance>();
/**
* Hash map of the containers we have released, but we
@@ -166,7 +172,7 @@
* containers is treated as a successful outcome
*/
private final ConcurrentMap<ContainerId, Container> containersBeingReleased =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<ContainerId, Container>();
/**
* Counter for completed containers ( complete denotes successful or failed )
@@ -201,34 +207,34 @@
* the node is promoted from here to the containerMap
*/
private final Map<ContainerId, RoleInstance> startingNodes =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<ContainerId, RoleInstance>();
/**
* List of completed nodes. This isn't kept in the CD as it gets too
* big for the RPC responses. Indeed, we should think about how deep to get this
*/
private final Map<ContainerId, RoleInstance> completedNodes
- = new ConcurrentHashMap<>();
+ = new ConcurrentHashMap<ContainerId, RoleInstance>();
/**
* Nodes that failed to start.
* Again, kept out of the CD
*/
private final Map<ContainerId, RoleInstance> failedNodes =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<ContainerId, RoleInstance>();
/**
* Nodes that came assigned to a role above that
* which were asked for -this appears to happen
*/
- private final Set<ContainerId> surplusNodes = new HashSet<>();
+ private final Set<ContainerId> surplusNodes = new HashSet<ContainerId>();
/**
* Map of containerID -> cluster nodes, for status reports.
* Access to this should be synchronized on the clusterDescription
*/
private final Map<ContainerId, RoleInstance> liveNodes =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<ContainerId, RoleInstance>();
private final AtomicInteger completionOfNodeNotInLiveListEvent =
new AtomicInteger();
private final AtomicInteger completionOfUnknownContainerEvent =
@@ -250,6 +256,10 @@
private long startTimeThreshold;
private int failureThreshold = 10;
+
+ private String logServerURL = "";
+
+ private ContainerReleaseSelector containerReleaseSelector;
public AppState(AbstractRecordFactory recordFactory) {
this.recordFactory = recordFactory;
@@ -313,6 +323,10 @@
return roles;
}
+ public Map<Integer, ProviderRole> getRolePriorityMap() {
+ return rolePriorityMap;
+ }
+
private Map<ContainerId, RoleInstance> getStartingNodes() {
return startingNodes;
}
@@ -432,28 +446,34 @@
/**
* Build up the application state
* @param instanceDefinition definition of the applicatin instance
+ * @param appmasterConfig
* @param publishedProviderConf any configuration info to be published by a provider
* @param providerRoles roles offered by a provider
* @param fs filesystem
* @param historyDir directory containing history files
* @param liveContainers list of live containers supplied on an AM restart
* @param applicationInfo
+ * @param releaseSelector
*/
public synchronized void buildInstance(AggregateConf instanceDefinition,
- Configuration publishedProviderConf,
- List<ProviderRole> providerRoles,
- FileSystem fs,
- Path historyDir,
- List<Container> liveContainers,
- Map<String, String> applicationInfo) throws
- BadClusterStateException,
- BadConfigException,
- IOException {
- this.publishedProviderConf = publishedProviderConf;
- this.applicationInfo = applicationInfo != null ? applicationInfo
- : new HashMap<String, String>();
+ Configuration appmasterConfig,
+ Configuration publishedProviderConf,
+ List<ProviderRole> providerRoles,
+ FileSystem fs,
+ Path historyDir,
+ List<Container> liveContainers,
+ Map<String, String> applicationInfo,
+ SimpleReleaseSelector releaseSelector)
+ throws BadClusterStateException, BadConfigException, IOException {
+ Preconditions.checkArgument(instanceDefinition != null);
+ Preconditions.checkArgument(releaseSelector != null);
- clientProperties = new HashMap<>();
+ this.publishedProviderConf = publishedProviderConf;
+ this.applicationInfo = applicationInfo != null ? applicationInfo
+ : new HashMap<String, String>();
+
+ clientProperties = new HashMap<String, String>();
+ containerReleaseSelector = releaseSelector;
Set<String> confKeys = ConfigHelper.sortedConfigKeys(publishedProviderConf);
@@ -463,8 +483,8 @@
String val = publishedProviderConf.get(key);
clientProperties.put(key, val);
}
-
-
+
+
// set the cluster specification (once its dependency the client properties
// is out the way
@@ -477,15 +497,16 @@
}
ConfTreeOperations resources =
- instanceDefinition.getResourceOperations();
-
+ instanceDefinition.getResourceOperations();
+
Set<String> roleNames = resources.getComponentNames();
for (String name : roleNames) {
if (!roles.containsKey(name)) {
// this is a new value
log.info("Adding new role {}", name);
MapOperations resComponent = resources.getComponent(name);
- ProviderRole dynamicRole = createDynamicProviderRole(name, resComponent);
+ ProviderRole dynamicRole =
+ createDynamicProviderRole(name, resComponent);
buildRole(dynamicRole);
providerRoles.add(dynamicRole);
}
@@ -495,25 +516,29 @@
//set the livespan
- MapOperations globalInternalOpts =
- instanceDefinition.getInternalOperations().getGlobalOptions();
- startTimeThreshold = globalInternalOpts.getOptionInt(
- OptionKeys.INTERNAL_CONTAINER_FAILURE_SHORTLIFE,
- OptionKeys.DEFAULT_CONTAINER_FAILURE_SHORTLIFE);
+ MapOperations globalResOpts =
+ instanceDefinition.getResourceOperations().getGlobalOptions();
- failureThreshold = globalInternalOpts.getOptionInt(
- OptionKeys.INTERNAL_CONTAINER_FAILURE_THRESHOLD,
- OptionKeys.DEFAULT_CONTAINER_FAILURE_THRESHOLD);
+ startTimeThreshold = globalResOpts.getOptionInt(
+ ResourceKeys.CONTAINER_FAILURE_SHORTLIFE,
+ ResourceKeys.DEFAULT_CONTAINER_FAILURE_SHORTLIFE);
+
+ failureThreshold = globalResOpts.getOptionInt(
+ ResourceKeys.CONTAINER_FAILURE_THRESHOLD,
+ ResourceKeys.DEFAULT_CONTAINER_FAILURE_THRESHOLD);
initClusterStatus();
// add the roles
roleHistory = new RoleHistory(providerRoles);
roleHistory.onStart(fs, historyDir);
-
+
//rebuild any live containers
rebuildModelFromRestart(liveContainers);
-
+
+ // any am config options to pick up
+
+ logServerURL = appmasterConfig.get(YarnConfiguration.YARN_LOG_SERVER_URL, "");
//mark as live
applicationLive = true;
}
@@ -611,7 +636,7 @@
}
/**
- * The resource configuration is updated -review and update state
+ * The resource configuration is updated -review and update state.
* @param resources updated resources specification
*/
public synchronized void updateResourceDefinitions(ConfTree resources) throws
@@ -690,8 +715,9 @@
roleStatusMap.get(priority));
}
roleStatusMap.put(priority,
- new RoleStatus(providerRole));
+ new RoleStatus(providerRole));
roles.put(providerRole.name, providerRole);
+ rolePriorityMap.put(priority, providerRole);
}
/**
@@ -766,41 +792,111 @@
}
- public synchronized List<RoleInstance> cloneActiveContainerList() {
- Collection<RoleInstance> values = activeContainers.values();
- return new ArrayList<>(values);
+ /**
+ * Clone the list of active (==owned) containers
+ * @return the list of role instances representing all owned containers
+ */
+ public synchronized List<RoleInstance> cloneOwnedContainerList() {
+ Collection<RoleInstance> values = ownedContainers.values();
+ return new ArrayList<RoleInstance>(values);
+ }
+
+ /**
+ * Get the number of active (==owned) containers
+ * @return
+ */
+ public int getNumOwnedContainers() {
+ return ownedContainers.size();
}
-
- public int getNumActiveContainers() {
- return activeContainers.size();
- }
-
-
- public RoleInstance getActiveContainer(ContainerId id) {
- return activeContainers.get(id);
+ /**
+ * Look up an active container: any container that the AM has, even
+ * if it is not currently running/live
+ */
+ public RoleInstance getOwnedContainer(ContainerId id) {
+ return ownedContainers.get(id);
}
+ /**
+ * Remove an owned container
+ * @param id container ID
+ * @return the instance removed
+ */
+ private RoleInstance removeOwnedContainer(ContainerId id) {
+ return ownedContainers.remove(id);
+ }
+ /**
+ * set/update an owned container
+ * @param id container ID
+ * @param instance
+ * @return
+ */
+ private RoleInstance putOwnedContainer(ContainerId id,
+ RoleInstance instance) {
+ return ownedContainers.put(id, instance);
+ }
+
+ /**
+ * Clone the live container list. This is synchronized.
+ * @return a snapshot of the live node list
+ */
public synchronized List<RoleInstance> cloneLiveContainerInfoList() {
List<RoleInstance> allRoleInstances;
Collection<RoleInstance> values = getLiveNodes().values();
- allRoleInstances = new ArrayList<>(values);
+ allRoleInstances = new ArrayList<RoleInstance>(values);
return allRoleInstances;
}
-
-
+ /**
+ * Lookup live instance by string value of container ID
+ * @param containerId container ID as a string
+ * @return the role instance for that container
+ * @throws NoSuchNodeException if it does not exist
+ */
public synchronized RoleInstance getLiveInstanceByContainerID(String containerId)
- throws NoSuchNodeException {
+ throws NoSuchNodeException {
Collection<RoleInstance> nodes = getLiveNodes().values();
+ return findNodeInCollection(containerId, nodes);
+ }
+
+ /**
+ * Lookup owned instance by string value of container ID
+ * @param containerId container ID as a string
+ * @return the role instance for that container
+ * @throws NoSuchNodeException if it does not exist
+ */
+ public synchronized RoleInstance getOwnedInstanceByContainerID(String containerId)
+ throws NoSuchNodeException {
+ Collection<RoleInstance> nodes = ownedContainers.values();
+ return findNodeInCollection(containerId, nodes);
+ }
+
+
+
+ /**
+ * Iterate through a collection of role instances to find one with a
+ * specific (string) container ID
+ * @param containerId container ID as a string
+ * @param nodes collection
+ * @return
+ * @throws NoSuchNodeException if there was no match
+ */
+ private RoleInstance findNodeInCollection(String containerId,
+ Collection<RoleInstance> nodes) throws NoSuchNodeException {
+ RoleInstance found = null;
for (RoleInstance node : nodes) {
if (containerId.equals(node.id)) {
- return node;
+ found = node;
+ break;
}
}
- //at this point: no node
- throw new NoSuchNodeException(containerId);
+ if (found != null) {
+ return found;
+ } else {
+ //at this point: no node
+ throw new NoSuchNodeException(containerId);
+ }
}
@@ -808,7 +904,7 @@
Collection<String> containerIDs) {
//first, a hashmap of those containerIDs is built up
Set<String> uuidSet = new HashSet<String>(containerIDs);
- List<RoleInstance> nodes = new ArrayList<>(uuidSet.size());
+ List<RoleInstance> nodes = new ArrayList<RoleInstance>(uuidSet.size());
Collection<RoleInstance> clusterNodes = getLiveNodes().values();
for (RoleInstance node : clusterNodes) {
@@ -826,7 +922,7 @@
* @return a list of nodes, may be empty
*/
public synchronized List<RoleInstance> enumLiveNodesInRole(String role) {
- List<RoleInstance> nodes = new ArrayList<>();
+ List<RoleInstance> nodes = new ArrayList<RoleInstance>();
Collection<RoleInstance> allRoleInstances = getLiveNodes().values();
for (RoleInstance node : allRoleInstances) {
if (role.isEmpty() || role.equals(node.role)) {
@@ -836,17 +932,38 @@
return nodes;
}
+
+ /**
+ * enum nodes by role ID, from either the active or live node list
+ * @param roleId role the container must be in
+ * @param active flag to indicate "use active list" rather than the smaller
+ * "live" list
+ * @return a list of nodes, may be empty
+ */
+ public synchronized List<RoleInstance> enumNodesWithRoleId(int roleId,
+ boolean active) {
+ List<RoleInstance> nodes = new ArrayList<RoleInstance>();
+ Collection<RoleInstance> allRoleInstances;
+ allRoleInstances = active? ownedContainers.values() : liveNodes.values();
+ for (RoleInstance node : allRoleInstances) {
+ if (node.roleId == roleId) {
+ nodes.add(node);
+ }
+ }
+ return nodes;
+ }
+
/**
* Build an instance map.
* @return the map of Role name to list of role instances
*/
private synchronized Map<String, List<String>> createRoleToInstanceMap() {
- Map<String, List<String>> map = new HashMap<>();
+ Map<String, List<String>> map = new HashMap<String, List<String>>();
for (RoleInstance node : getLiveNodes().values()) {
List<String> containers = map.get(node.role);
if (containers == null) {
- containers = new ArrayList<>();
+ containers = new ArrayList<String>();
map.put(node.role, containers);
}
containers.add(node.id);
@@ -858,12 +975,12 @@
* @return the map of Role name to list of Cluster Nodes, ready
*/
private synchronized Map<String, Map<String, ClusterNode>> createRoleToClusterNodeMap() {
- Map<String, Map<String, ClusterNode>> map = new HashMap<>();
+ Map<String, Map<String, ClusterNode>> map = new HashMap<String, Map<String, ClusterNode>>();
for (RoleInstance node : getLiveNodes().values()) {
Map<String, ClusterNode> containers = map.get(node.role);
if (containers == null) {
- containers = new HashMap<>();
+ containers = new HashMap<String, ClusterNode>();
map.put(node.role, containers);
}
Messages.RoleInstanceState pbuf = node.toProtobuf();
@@ -885,7 +1002,7 @@
instance.container = container;
instance.createTime = now();
getStartingNodes().put(container.getId(), instance);
- activeContainers.put(container.getId(), instance);
+ putOwnedContainer(container.getId(), instance);
roleHistory.onContainerStartSubmitted(container, instance);
}
@@ -902,19 +1019,19 @@
throws SliderInternalStateException {
ContainerId id = container.getId();
//look up the container
- RoleInstance info = getActiveContainer(id);
- if (info == null) {
+ RoleInstance instance = getOwnedContainer(id);
+ if (instance == null) {
throw new SliderInternalStateException(
- "No active container with ID " + id.toString());
+ "No active container with ID " + id);
}
//verify that it isn't already released
if (containersBeingReleased.containsKey(id)) {
throw new SliderInternalStateException(
"Container %s already queued for release", id);
}
- info.released = true;
- containersBeingReleased.put(id, info.container);
- RoleStatus role = lookupRoleStatus(info.roleId);
+ instance.released = true;
+ containersBeingReleased.put(id, instance.container);
+ RoleStatus role = lookupRoleStatus(instance.roleId);
role.incReleasing();
roleHistory.onContainerReleaseSubmitted(container);
}
@@ -1052,7 +1169,7 @@
@VisibleForTesting
public RoleInstance innerOnNodeManagerContainerStarted(ContainerId containerId) {
incStartedCountainerCount();
- RoleInstance instance = activeContainers.get(containerId);
+ RoleInstance instance = getOwnedContainer(containerId);
if (instance == null) {
//serious problem
throw new YarnRuntimeException("Container not in active containers start "+
@@ -1088,17 +1205,20 @@
*/
public synchronized void onNodeManagerContainerStartFailed(ContainerId containerId,
Throwable thrown) {
- activeContainers.remove(containerId);
+ removeOwnedContainer(containerId);
incFailedCountainerCount();
incStartFailedCountainerCount();
RoleInstance instance = getStartingNodes().remove(containerId);
if (null != instance) {
RoleStatus roleStatus = lookupRoleStatus(instance.roleId);
+ String text;
if (null != thrown) {
- instance.diagnostics = SliderUtils.stringify(thrown);
+ text = SliderUtils.stringify(thrown);
+ } else {
+ text = "container start failure";
}
- roleStatus.noteFailed(null);
- roleStatus.incStartFailed();
+ instance.diagnostics = text;
+ roleStatus.noteFailed(true, null);
getFailedNodes().put(containerId, instance);
roleHistory.onNodeManagerContainerStartFailed(instance.container);
}
@@ -1161,31 +1281,23 @@
* @return NodeCompletionResult
*/
public synchronized NodeCompletionResult onCompletedNode(ContainerStatus status) {
- return onCompletedNode(null, status);
- }
-
- /**
- * handle completed node in the CD -move something from the live
- * server list to the completed server list
- * @param amConf YarnConfiguration
- * @param status the node that has just completed
- * @return NodeCompletionResult
- */
- public synchronized NodeCompletionResult onCompletedNode(Configuration amConf,
- ContainerStatus status) {
ContainerId containerId = status.getContainerId();
NodeCompletionResult result = new NodeCompletionResult();
RoleInstance roleInstance;
if (containersBeingReleased.containsKey(containerId)) {
- log.info("Container was queued for release");
+ log.info("Container was queued for release : {}", containerId);
Container container = containersBeingReleased.remove(containerId);
RoleStatus roleStatus = lookupRoleStatus(container);
- log.info("decrementing role count for role {}", roleStatus.getName());
- roleStatus.decReleasing();
- roleStatus.decActual();
- roleStatus.incCompleted();
- roleHistory.onReleaseCompleted(container);
+ int releasing = roleStatus.decReleasing();
+ int actual = roleStatus.decActual();
+ int completedCount = roleStatus.incCompleted();
+ log.info("decrementing role count for role {} to {}; releasing={}, completed={}",
+ roleStatus.getName(),
+ actual,
+ releasing,
+ completedCount);
+ roleHistory.onReleaseCompleted(container, true);
} else if (surplusNodes.remove(containerId)) {
//its a surplus one being purged
@@ -1193,7 +1305,7 @@
} else {
//a container has failed
result.containerFailed = true;
- roleInstance = activeContainers.remove(containerId);
+ roleInstance = removeOwnedContainer(containerId);
if (roleInstance != null) {
//it was active, move it to failed
incFailedCountainerCount();
@@ -1205,43 +1317,30 @@
}
if (roleInstance != null) {
int roleId = roleInstance.roleId;
- log.info("Failed container in role {}", roleId);
+ String rolename = roleInstance.role;
+ log.info("Failed container in role[{}] : {}", roleId, rolename);
try {
RoleStatus roleStatus = lookupRoleStatus(roleId);
roleStatus.decActual();
boolean shortLived = isShortLived(roleInstance);
String message;
- if (roleInstance.container != null) {
- String user = null;
- try {
- user = SliderUtils.getCurrentUser().getShortUserName();
- } catch (IOException ignored) {
- }
- String completedLogsUrl = null;
- Container c = roleInstance.container;
- String url = null;
- if (amConf != null) {
- url = amConf.get(YarnConfiguration.YARN_LOG_SERVER_URL);
- }
- if (user != null && url != null) {
- completedLogsUrl = url
- + "/" + c.getNodeId() + "/" + roleInstance.getContainerId() + "/ctx/" + user;
- }
- message = String.format("Failure %s on host %s" +
- (completedLogsUrl != null ? ", see %s" : ""), roleInstance.getContainerId(),
- c.getNodeId().getHost(), completedLogsUrl);
- } else {
- message = String.format("Failure %s",
- containerId.toString());
- }
- roleStatus.noteFailed(message);
- //have a look to see if it short lived
- if (shortLived) {
- roleStatus.incStartFailed();
- }
+ Container failedContainer = roleInstance.container;
- if (roleInstance.container != null) {
- roleHistory.onFailedContainer(roleInstance.container, shortLived);
+ //build the failure message
+ if (failedContainer != null) {
+ String completedLogsUrl = getLogsURLForContainer(failedContainer);
+ message = String.format("Failure %s on host %s: %s",
+ roleInstance.getContainerId().toString(),
+ failedContainer.getNodeId().getHost(),
+ completedLogsUrl);
+ } else {
+ message = String.format("Failure %s", containerId);
+ }
+ int failed = roleStatus.noteFailed(shortLived, message);
+ log.info("Current count of failed role[{}] {} = {}",
+ roleId, rolename, failed);
+ if (failedContainer != null) {
+ roleHistory.onFailedContainer(failedContainer, shortLived);
}
} catch (YarnRuntimeException e1) {
@@ -1255,30 +1354,67 @@
completionOfUnknownContainerEvent.incrementAndGet();
}
}
-
+
if (result.surplusNode) {
//a surplus node
return result;
}
-
+
//record the complete node's details; this pulls it from the livenode set
//remove the node
ContainerId id = status.getContainerId();
+ log.info("Removing node ID {}", id);
RoleInstance node = getLiveNodes().remove(id);
- if (node == null) {
- log.warn("Received notification of completion of unknown node {}", id);
- completionOfNodeNotInLiveListEvent.incrementAndGet();
-
- } else {
+ if (node != null) {
node.state = ClusterDescription.STATE_DESTROYED;
node.exitCode = status.getExitStatus();
node.diagnostics = status.getDiagnostics();
getCompletedNodes().put(id, node);
result.roleInstance = node;
+ } else {
+ // not in the list
+ log.warn("Received notification of completion of unknown node {}", id);
+ completionOfNodeNotInLiveListEvent.incrementAndGet();
+
}
+
+ // and the active node list if present
+ removeOwnedContainer(containerId);
+
+ // finally, verify the node doesn't exist any more
+ assert !containersBeingReleased.containsKey(
+ containerId) : "container still in release queue";
+ assert !getLiveNodes().containsKey(
+ containerId) : " container still in live nodes";
+ assert getOwnedContainer(containerId) ==
+ null : "Container still in active container list";
+
return result;
}
+ /**
+ * Get the URL log for a container
+ * @param c container
+ * @return the URL or "" if it cannot be determined
+ */
+ protected String getLogsURLForContainer(Container c) {
+ if (c==null) {
+ return null;
+ }
+ String user = null;
+ try {
+ user = SliderUtils.getCurrentUser().getShortUserName();
+ } catch (IOException ignored) {
+ }
+ String completedLogsUrl = "";
+ String url = logServerURL;
+ if (user != null && SliderUtils.isSet(url)) {
+ completedLogsUrl = url
+ + "/" + c.getNodeId() + "/" + c.getId() + "/ctx/" + user;
+ }
+ return completedLogsUrl;
+ }
+
/**
* Return the percentage done that Slider is to have YARN display in its
@@ -1310,7 +1446,7 @@
* Update the cluster description with anything interesting
* @param providerStatus status from the provider for the cluster info section
*/
- public void refreshClusterStatus(Map<String, String> providerStatus) {
+ public synchronized ClusterDescription refreshClusterStatus(Map<String, String> providerStatus) {
ClusterDescription cd = getClusterStatus();
long now = now();
cd.setInfoTime(StatusKeys.INFO_STATUS_TIME_HUMAN,
@@ -1321,10 +1457,10 @@
cd.setInfo(entry.getKey(),entry.getValue());
}
}
- MapOperations infoOps = new MapOperations("info",cd.info);
+ MapOperations infoOps = new MapOperations("info", cd.info);
infoOps.mergeWithoutOverwrite(applicationInfo);
SliderUtils.addBuildInfo(infoOps, "status");
- cd.statistics = new HashMap<>();
+ cd.statistics = new HashMap<String, Map<String, Integer>>();
// build the map of node -> container IDs
Map<String, List<String>> instanceMap = createRoleToInstanceMap();
@@ -1333,7 +1469,7 @@
//build the map of node -> containers
Map<String, Map<String, ClusterNode>> clusterNodes =
createRoleToClusterNodeMap();
- cd.status = new HashMap<>();
+ cd.status = new HashMap<String, Object>();
cd.status.put(ClusterDescriptionKeys.KEY_CLUSTER_LIVE, clusterNodes);
@@ -1352,7 +1488,7 @@
cd.statistics.put(rolename, stats);
}
- Map<String, Integer> sliderstats = new HashMap<>();
+ Map<String, Integer> sliderstats = new HashMap<String, Integer>();
sliderstats.put(StatusKeys.STATISTICS_CONTAINERS_COMPLETED,
completedContainerCount.get());
sliderstats.put(StatusKeys.STATISTICS_CONTAINERS_FAILED,
@@ -1367,7 +1503,7 @@
sliderstats.put(StatusKeys.STATISTICS_CONTAINERS_UNKNOWN_COMPLETED,
completionOfUnknownContainerEvent.get());
cd.statistics.put(SliderKeys.COMPONENT_AM, sliderstats);
-
+ return cd;
}
/**
@@ -1376,7 +1512,7 @@
public synchronized List<AbstractRMOperation> reviewRequestAndReleaseNodes()
throws SliderInternalStateException, TriggerClusterTeardownException {
log.debug("in reviewRequestAndReleaseNodes()");
- List<AbstractRMOperation> allOperations = new ArrayList<>();
+ List<AbstractRMOperation> allOperations = new ArrayList<AbstractRMOperation>();
for (RoleStatus roleStatus : getRoleStatusMap().values()) {
if (!roleStatus.getExcludeFromFlexing()) {
List<AbstractRMOperation> operations = reviewOneRole(roleStatus);
@@ -1385,23 +1521,58 @@
}
return allOperations;
}
-
- public void checkFailureThreshold(RoleStatus role) throws
- TriggerClusterTeardownException {
- int failures = role.getFailed();
- if (failures > failureThreshold) {
+ /**
+ * Check the failure threshold for a role
+ * @param role role to examine
+ * @throws TriggerClusterTeardownException if the role
+ * has failed too many times
+ */
+ private void checkFailureThreshold(RoleStatus role)
+ throws TriggerClusterTeardownException {
+ int failures = role.getFailed();
+ int threshold = getFailureThresholdForRole(role);
+ log.debug("Failure count of role: {}: {}, threshold={}",
+ role.getName(), failures, threshold);
+
+ if (failures > threshold) {
throw new TriggerClusterTeardownException(
SliderExitCodes.EXIT_DEPLOYMENT_FAILED,
ErrorStrings.E_UNSTABLE_CLUSTER +
- " - failed with role %s failing %d times (%d in startup); threshold is %d - last failure: %s",
+ " - failed with role %s failing %d times (%d in startup);" +
+ " threshold is %d - last failure: %s",
role.getName(),
role.getFailed(),
role.getStartFailed(),
- failureThreshold,
+ threshold,
role.getFailureMessage());
}
}
+
+ /**
+ * Get the failure threshold for a specific role, falling back to
+ * the global one if not
+ * @param roleStatus
+ * @return the threshold for failures
+ */
+ private int getFailureThresholdForRole(RoleStatus roleStatus) {
+ ConfTreeOperations resources =
+ instanceDefinition.getResourceOperations();
+ return resources.getComponentOptInt(roleStatus.getName(),
+ ResourceKeys.CONTAINER_FAILURE_SHORTLIFE,
+ failureThreshold);
+ }
+
+ /**
+ * Reset the failure counts of all roles
+ */
+ public void resetFailureCounts() {
+ for (RoleStatus roleStatus : getRoleStatusMap().values()) {
+ int failed = roleStatus.resetFailed();
+ log.debug("Resetting failure count of {}; was {}", roleStatus.getName(),
+ failed);
+ }
+ }
/**
* Look at the allocation status of one role, and trigger add/release
@@ -1412,9 +1583,10 @@
* @throws SliderInternalStateException if the operation reveals that
* the internal state of the application is inconsistent.
*/
- public List<AbstractRMOperation> reviewOneRole(RoleStatus role)
+ @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
+ private List<AbstractRMOperation> reviewOneRole(RoleStatus role)
throws SliderInternalStateException, TriggerClusterTeardownException {
- List<AbstractRMOperation> operations = new ArrayList<>();
+ List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>();
int delta;
String details;
int expected;
@@ -1457,18 +1629,43 @@
// get the nodes to release
int roleId = role.getKey();
- List<NodeInstance> nodesForRelease =
- roleHistory.findNodesForRelease(roleId, excess);
-
- for (NodeInstance node : nodesForRelease) {
- RoleInstance possible = findRoleInstanceOnHost(node, roleId);
- if (possible == null) {
- throw new SliderInternalStateException(
- "Failed to find a container to release on node %s", node.hostname);
- }
- containerReleaseSubmitted(possible.container);
- operations.add(new ContainerReleaseOperation(possible.getId()));
+
+ // enum all active nodes that aren't being released
+ List<RoleInstance> containersToRelease = enumNodesWithRoleId(roleId, true);
+ // cut all release-in-progress nodes
+ ListIterator<RoleInstance> li = containersToRelease.listIterator();
+ while (li.hasNext()) {
+ RoleInstance next = li.next();
+ if (next.released) {
+ li.remove();
+ }
+ }
+
+ // warn if the desired state can't be reaced
+ if (containersToRelease.size() < excess) {
+ log.warn("Not enough nodes to release...short of {} nodes",
+ containersToRelease.size() - excess);
+ }
+
+ // ask the release selector to sort the targets
+ containersToRelease = containerReleaseSelector.sortCandidates(
+ roleId,
+ containersToRelease,
+ excess);
+
+ //crop to the excess
+
+ List<RoleInstance> finalCandidates = (excess < containersToRelease.size())
+ ? containersToRelease.subList(0, excess)
+ : containersToRelease;
+
+
+ // then build up a release operation, logging each container as released
+ for (RoleInstance possible : finalCandidates) {
+ log.debug("Targeting for release: {}", possible);
+ containerReleaseSubmitted(possible.container);
+ operations.add(new ContainerReleaseOperation(possible.getId()));
}
}
@@ -1482,12 +1679,12 @@
* @return
* @throws SliderInternalStateException
*/
- public List<AbstractRMOperation> releaseContainer(String containerId)
+ public List<AbstractRMOperation> releaseContainer(ContainerId containerId)
throws SliderInternalStateException {
- List<AbstractRMOperation> operations = new ArrayList<>();
- List<RoleInstance> activeRoleInstances = cloneActiveContainerList();
+ List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>();
+ List<RoleInstance> activeRoleInstances = cloneOwnedContainerList();
for (RoleInstance role : activeRoleInstances) {
- if (role.container.getId().toString().equals(containerId)) {
+ if (role.container.getId().equals(containerId)) {
containerReleaseSubmitted(role.container);
operations.add(new ContainerReleaseOperation(role.getId()));
}
@@ -1507,7 +1704,7 @@
* that can be released.
*/
private RoleInstance findRoleInstanceOnHost(NodeInstance node, int roleId) {
- Collection<RoleInstance> targets = cloneActiveContainerList();
+ Collection<RoleInstance> targets = cloneOwnedContainerList();
String hostname = node.hostname;
for (RoleInstance ri : targets) {
if (hostname.equals(RoleHistoryUtils.hostnameOf(ri.container))
@@ -1525,14 +1722,16 @@
*/
public synchronized List<AbstractRMOperation> releaseAllContainers() {
- Collection<RoleInstance> targets = cloneActiveContainerList();
+ Collection<RoleInstance> targets = cloneOwnedContainerList();
log.info("Releasing {} containers", targets.size());
List<AbstractRMOperation> operations =
- new ArrayList<>(targets.size());
+ new ArrayList<AbstractRMOperation>(targets.size());
for (RoleInstance instance : targets) {
Container possible = instance.container;
ContainerId id = possible.getId();
if (!instance.released) {
+ String url = getLogsURLForContainer(possible);
+ log.info("Releasing container. Log: " + url);
try {
containerReleaseSubmitted(possible);
} catch (SliderInternalStateException e) {
@@ -1677,7 +1876,7 @@
instance.container = container;
instance.createTime = now();
instance.state = ClusterDescription.STATE_LIVE;
- activeContainers.put(cid, instance);
+ putOwnedContainer(cid, instance);
//role history gets told
roleHistory.onContainerAssigned(container);
// pretend the container has just had its start actions submitted
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseSelector.java
similarity index 64%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseSelector.java
index e3e595f..0cbc134 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseSelector.java
@@ -18,14 +18,21 @@
package org.apache.slider.server.appmaster.state;
-public class AbstractRMOperation {
+import java.util.List;
+
+/**
+ * Interface implemented by anything that must choose containers to release
+ *
+ */
+public interface ContainerReleaseSelector {
/**
- * Execute the operation
- * @param asyncRMClient client
+ * Given a list of candidate containers, return a sorted version of the priority
+ * in which they should be released.
+ * @param candidates candidate list ... everything considered suitable
+ * @return
*/
- public void execute(RMOperationHandler handler) {
-
- }
-
+ List<RoleInstance> sortCandidates(int roleId,
+ List<RoleInstance> candidates,
+ int minimumToSelect);
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/MostRecentContainerReleaseSelector.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/MostRecentContainerReleaseSelector.java
new file mode 100644
index 0000000..841dda3
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/MostRecentContainerReleaseSelector.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.state;
+
+import org.apache.slider.common.tools.Comparators;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Sort the candidate list by the most recent container first.
+ */
+public class MostRecentContainerReleaseSelector implements ContainerReleaseSelector {
+
+ @Override
+ public List<RoleInstance> sortCandidates(int roleId,
+ List<RoleInstance> candidates,
+ int minimumToSelect) {
+ Collections.sort(candidates, new newerThan());
+ return candidates;
+ }
+
+ private static class newerThan implements Comparator<RoleInstance>, Serializable {
+ private final Comparator<Long> innerComparator =
+ new Comparators.ComparatorReverser<Long>(new Comparators.LongComparator());
+ public int compare(RoleInstance o1, RoleInstance o2) {
+ return innerComparator.compare(o1.createTime, o2.createTime);
+
+ }
+
+ }
+
+
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
index a9e5a8c..c8ab2a7 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
@@ -169,7 +169,6 @@
* Release an instance -which is no longer marked as active
*/
public synchronized void release() {
- assert live > 0 : "no live nodes to release";
releasing++;
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index 06375fb..1ba2282 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -41,7 +41,7 @@
*/
public NodeInstance(String hostname, int roles) {
this.hostname = hostname;
- nodeEntries = new ArrayList<>(roles);
+ nodeEntries = new ArrayList<NodeEntry>(roles);
}
/**
@@ -75,7 +75,7 @@
}
/**
- * Cout the number of active role instances on this node
+ * Count the number of active role instances on this node
* @param role role index
* @return 0 if there are none, otherwise the #of nodes that are running and
* not being released already.
@@ -84,6 +84,16 @@
NodeEntry nodeEntry = get(role);
return (nodeEntry != null ) ? nodeEntry.getActive() : 0;
}
+
+ /**
+ * Count the number of live role instances on this node
+ * @param role role index
+ * @return 0 if there are none, otherwise the #of nodes that are running
+ */
+ public int getLiveRoleInstances(int role) {
+ NodeEntry nodeEntry = get(role);
+ return (nodeEntry != null ) ? nodeEntry.getLive() : 0;
+ }
/**
* Get the entry for a role -and remove it if present
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index 32b1656..570c194 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -73,7 +73,7 @@
* in that role
*/
public List<NodeInstance> listActiveNodes(int role) {
- List<NodeInstance> nodes = new ArrayList<>();
+ List<NodeInstance> nodes = new ArrayList<NodeInstance>();
for (NodeInstance instance : values()) {
if (instance.getActiveRoleInstances(role) > 0) {
nodes.add(instance);
@@ -102,60 +102,6 @@
}
return purged;
}
-
-
-
- /**
- * Find a list of node for release; algorithm may make its own
- * decisions on which to release.
- * @param role role index
- * @param count number of nodes to release
- * @return a possibly empty list of nodes.
- */
- public List<NodeInstance> findNodesForRelease(int role, int count) {
- List<NodeInstance> targets = new ArrayList<>(count);
- List<NodeInstance> active = listActiveNodes(role);
- List<NodeInstance> multiple = new ArrayList<>();
- int nodesRemaining = count;
- log.debug("searching for {} nodes with candidate set size {}",
- nodesRemaining, active.size());
- ListIterator<NodeInstance> it = active.listIterator();
- while (it.hasNext() && nodesRemaining > 0) {
- NodeInstance ni = it.next();
- int load = ni.getActiveRoleInstances(role);
- log.debug("Node {} load={}", ni, load);
- assert load != 0;
- if (load == 1) {
- // at the tail of the list, from here active[*] is a load=1 entry
- break;
- }
- // load is >1. Add an entry to the target list FOR EACH INSTANCE ABOVE 1
- for (int i = 0; i < (load - 1) && nodesRemaining > 0; i++) {
- nodesRemaining--;
- log.debug("Push {} #{}", ni, i);
- targets.add(ni);
- }
- // and add to the multiple list
- multiple.add(ni);
- // then pop it from the active list
- it.remove();
- }
- //here either the number is found or there is still some left.
-
- if (nodesRemaining > 0) {
- // leftovers. Append any of the multiple node entries to the tail of
- // the active list (so they get chosen last)
- active.addAll(multiple);
- // all the entries in the list have exactly one node
- // so ask for as many as are needed
- int ask = Math.min(nodesRemaining, active.size());
- log.debug("load=1 nodes to select={} multiples={} available={} ask={}",
- nodesRemaining, multiple.size(),active.size(), ask);
- targets.addAll(active.subList(0, ask));
- }
- return targets;
- }
-
/**
* Clone point
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index fa2c754..d847962 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -43,7 +43,7 @@
LoggerFactory.getLogger(OutstandingRequestTracker.class);
private Map<OutstandingRequest, OutstandingRequest> requests =
- new HashMap<>();
+ new HashMap<OutstandingRequest, OutstandingRequest>();
/**
* Create a new request for the specific role. If a
@@ -161,7 +161,7 @@
* @return possibly empty list of hostnames
*/
public synchronized List<NodeInstance> cancelOutstandingRequests(int role) {
- List<NodeInstance> hosts = new ArrayList<>();
+ List<NodeInstance> hosts = new ArrayList<NodeInstance>();
Iterator<Map.Entry<OutstandingRequest,OutstandingRequest>> iterator =
requests.entrySet().iterator();
while (iterator.hasNext()) {
@@ -178,6 +178,6 @@
}
public synchronized List<OutstandingRequest> listOutstandingRequests() {
- return new ArrayList<>(requests.values());
+ return new ArrayList<OutstandingRequest>(requests.values());
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
index 6caf1a9..a0871ae 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
@@ -39,7 +39,7 @@
private final Map<String, PublishedConfigSet> publishedConfigSets =
- new ConcurrentHashMap<>(5);
+ new ConcurrentHashMap<String, PublishedConfigSet>(5);
private static final PatternValidator validator = new PatternValidator(
RestPaths.PUBLISHED_CONFIGURATION_SET_REGEXP);
private String applicationName;
@@ -92,7 +92,7 @@
public List<String> listConfigSets() {
synchronized (publishedConfigSets) {
- List<String> sets = new ArrayList<>(publishedConfigSets.keySet());
+ List<String> sets = new ArrayList<String>(publishedConfigSets.keySet());
return sets;
}
}
@@ -164,18 +164,23 @@
}
@Override
- public List<RoleInstance> cloneActiveContainerList() {
- return appState.cloneActiveContainerList();
+ public List<RoleInstance> cloneOwnedContainerList() {
+ return appState.cloneOwnedContainerList();
}
@Override
- public int getNumActiveContainers() {
- return appState.getNumActiveContainers();
+ public int getNumOwnedContainers() {
+ return appState.getNumOwnedContainers();
}
@Override
- public RoleInstance getActiveContainer(ContainerId id) {
- return appState.getActiveContainer(id);
+ public RoleInstance getOwnedContainer(ContainerId id) {
+ return appState.getOwnedContainer(id);
+ }
+
+ @Override
+ public RoleInstance getOwnedContainer(String id) throws NoSuchNodeException {
+ return appState.getOwnedInstanceByContainerID(id);
}
@Override
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 0cd2b39..edcf7ea 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -29,7 +29,6 @@
import org.apache.slider.providers.ProviderRole;
import org.apache.slider.server.avro.RoleHistoryHeader;
import org.apache.slider.server.avro.RoleHistoryWriter;
-import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -61,7 +60,7 @@
LoggerFactory.getLogger(RoleHistory.class);
private final List<ProviderRole> providerRoles;
private final Map<String, ProviderRole> providerRoleMap =
- new HashMap<>();
+ new HashMap<String, ProviderRole>();
private long startTime;
/**
* Time when saved
@@ -108,9 +107,8 @@
nodemap = new NodeMap(roleSize);
resetAvailableNodeLists();
- resetAvailableNodeLists();
outstandingRequests = new OutstandingRequestTracker();
- Map<Integer, RoleStatus> roleStats = new HashMap<>();
+ Map<Integer, RoleStatus> roleStats = new HashMap<Integer, RoleStatus>();
for (ProviderRole providerRole : providerRoles) {
@@ -142,7 +140,7 @@
*/
public void addNewProviderRole(ProviderRole providerRole)
throws BadConfigException {
- Map<Integer, RoleStatus> roleStats = new HashMap<>();
+ Map<Integer, RoleStatus> roleStats = new HashMap<Integer, RoleStatus>();
for (ProviderRole role : providerRoles) {
@@ -156,7 +154,7 @@
* Clear the lists of available nodes
*/
private synchronized void resetAvailableNodeLists() {
- availableNodes = new HashMap<>(roleSize);
+ availableNodes = new HashMap<Integer, LinkedList<NodeInstance>>(roleSize);
}
/**
@@ -434,7 +432,7 @@
private LinkedList<NodeInstance> getOrCreateNodesForRoleId(int id) {
LinkedList<NodeInstance> instances = availableNodes.get(id);
if (instances == null) {
- instances = new LinkedList<>();
+ instances = new LinkedList<NodeInstance>();
availableNodes.put(id, instances);
}
return instances;
@@ -479,7 +477,7 @@
}
}
if (nodeInstance == null) {
- log.debug("No node selected for {}", role.getName());
+ log.debug("No historical node found for {}", role.getName());
}
return nodeInstance;
}
@@ -518,18 +516,16 @@
return requestInstanceOnNode(node, role, resource);
}
-
/**
- * Find a list of node for release; algorithm may make its own
- * decisions on which to release.
+ * Get the list of active nodes ... walks the node map so
+ * is O(nodes)
* @param role role index
- * @param count number of nodes to release
- * @return a possibly empty list of nodes.
+ * @return a possibly empty list of nodes with an instance of that node
*/
- public synchronized List<NodeInstance> findNodesForRelease(int role, int count) {
- return nodemap.findNodesForRelease(role, count);
+ public synchronized List<NodeInstance> listActiveNodes(int role) {
+ return nodemap.listActiveNodes(role);
}
-
+
/**
* Get the node entry of a container
* @param container container to look up
@@ -572,7 +568,7 @@
List<Container> requested =
new ArrayList<Container>(allocatedContainers.size());
List<Container> unrequested =
- new ArrayList<>(allocatedContainers.size());
+ new ArrayList<Container>(allocatedContainers.size());
outstandingRequests.partitionRequests(this, allocatedContainers, requested, unrequested);
//give the unrequested ones lower priority
@@ -659,10 +655,11 @@
/**
* App state notified of a container completed
* @param container completed container
+ * @param wasReleased
* @return true if the node was queued
*/
- public boolean onReleaseCompleted(Container container) {
- return markContainerFinished(container, true, false);
+ public boolean onReleaseCompleted(Container container, boolean wasReleased) {
+ return markContainerFinished(container, wasReleased, false);
}
/**
@@ -750,7 +747,7 @@
*/
@VisibleForTesting
public List<NodeInstance> cloneAvailableList(int role) {
- return new LinkedList<>(getOrCreateNodesForRoleId(role));
+ return new LinkedList<NodeInstance>(getOrCreateNodesForRoleId(role));
}
/**
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java
index 205edea..e373843 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java
@@ -25,8 +25,12 @@
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.proto.Messages;
import org.apache.slider.common.tools.SliderUtils;
+import org.apache.slider.core.registry.info.RegisteredEndpoint;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
/**
* Tracking information about a container
@@ -82,11 +86,12 @@
public String host;
public String hostURL;
+
/**
- * Any information the provider wishes to retain on the state of
- * an instance
+ * A list of registered endpoints.
*/
- public Object providerInfo;
+ private List<RegisteredEndpoint> endpoints =
+ new ArrayList<RegisteredEndpoint>(2);
public RoleInstance(Container container) {
Preconditions.checkNotNull(container, "Null container");
@@ -115,12 +120,12 @@
public String toString() {
final StringBuilder sb =
new StringBuilder("RoleInstance{");
- sb.append("container=").append(SliderUtils.containerToString(container));
+ sb.append("role='").append(role).append('\'');
sb.append(", id='").append(id).append('\'');
+ sb.append(", container=").append(SliderUtils.containerToString(container));
sb.append(", createTime=").append(createTime);
sb.append(", startTime=").append(startTime);
sb.append(", released=").append(released);
- sb.append(", role='").append(role).append('\'');
sb.append(", roleId=").append(roleId);
sb.append(", host=").append(host);
sb.append(", hostURL=").append(hostURL);
@@ -188,8 +193,39 @@
@Override
public Object clone() throws CloneNotSupportedException {
RoleInstance cloned = (RoleInstance) super.clone();
+ // clone the endpoint list, but not the values
+ cloned.endpoints = new ArrayList<RegisteredEndpoint>(this.endpoints);
return cloned;
}
+ /**
+ * Get the list of endpoints.
+ * @return the endpoint list.
+ */
+ public List<RegisteredEndpoint> getEndpoints() {
+ return endpoints;
+ }
+ /**
+ * Add an endpoint registration
+ * @param endpoint
+ */
+ public void addEndpoint(RegisteredEndpoint endpoint) {
+ Preconditions.checkArgument(endpoint != null);
+ endpoints.add(endpoint);
+ }
+
+ /**
+ * Register a port endpoint as an inet-addr formatted endpoint, using the
+ * hostname as the first part of the address
+ * @param port
+ * @param protocol
+ * @param text
+ */
+ public void registerPortEndpoint(int port, String protocol, String text) {
+ InetSocketAddress addr = new InetSocketAddress(host, port);
+ RegisteredEndpoint epr = new RegisteredEndpoint(addr, protocol, text);
+ addEndpoint(epr);
+ }
+
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 04d8b37..df4ab8e 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -45,7 +45,7 @@
private final ProviderRole providerRole;
private int desired, actual, requested, releasing;
- private int failed, started, startFailed, completed, totalRequested;
+ private volatile int failed, started, startFailed, completed, totalRequested;
private String failureMessage = "";
@@ -143,16 +143,34 @@
}
/**
+ * Reset the failure counts
+ * @return the total number of failures up to this point
+ */
+ public int resetFailed() {
+ int total = failed + startFailed;
+ failed = 0;
+ startFailed = 0;
+ return total;
+ }
+
+ /**
* Note that a role failed, text will
* be used in any diagnostics if an exception
* is later raised.
+ * @param startupFailure flag to indicate this was a startup event
+ * @return the number of failures
* @param text text about the failure
*/
- public void noteFailed(String text) {
- failed++;
+ public int noteFailed(boolean startupFailure, String text) {
+ int current = ++failed;
if (text != null) {
failureMessage = text;
}
+ //have a look to see if it short lived
+ if (startupFailure) {
+ incStartFailed();
+ }
+ return current;
}
public int getStartFailed() {
@@ -175,8 +193,8 @@
this.completed = completed;
}
- public void incCompleted() {
- completed ++;
+ public int incCompleted() {
+ return completed ++;
}
public int getStarted() {
return started;
@@ -246,7 +264,7 @@
* @return a map for use in statistics reports
*/
public Map<String, Integer> buildStatistics() {
- Map<String, Integer> stats = new HashMap<>();
+ Map<String, Integer> stats = new HashMap<String, Integer>();
stats.put(StatusKeys.STATISTICS_CONTAINERS_ACTIVE_REQUESTS, getRequested());
stats.put(StatusKeys.STATISTICS_CONTAINERS_COMPLETED, getCompleted());
stats.put(StatusKeys.STATISTICS_CONTAINERS_DESIRED, getDesired());
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/SimpleReleaseSelector.java
similarity index 73%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/state/SimpleReleaseSelector.java
index e3e595f..b7f0e05 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/SimpleReleaseSelector.java
@@ -18,14 +18,17 @@
package org.apache.slider.server.appmaster.state;
-public class AbstractRMOperation {
+import java.util.List;
- /**
- * Execute the operation
- * @param asyncRMClient client
- */
- public void execute(RMOperationHandler handler) {
+/**
+ * Simplest release selector simply returns the list
+ */
+public class SimpleReleaseSelector implements ContainerReleaseSelector {
+ @Override
+ public List<RoleInstance> sortCandidates(int roleId,
+ List<RoleInstance> candidates,
+ int minimumToSelect) {
+ return candidates;
}
-
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
index acba8cc..1714f75 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
@@ -144,20 +144,27 @@
* @return the active containers at the time
* the call was made
*/
- List<RoleInstance> cloneActiveContainerList();
+ List<RoleInstance> cloneOwnedContainerList();
/**
* Get the number of active containers
* @return the number of active containers the time the call was made
*/
- int getNumActiveContainers();
+ int getNumOwnedContainers();
/**
* Get any active container with the given ID
* @param id container Id
* @return the active container or null if it is not found
*/
- RoleInstance getActiveContainer(ContainerId id);
+ RoleInstance getOwnedContainer(ContainerId id);
+
+ /**
+ * Get any active container with the given ID
+ * @param id container Id
+ * @return the active container or null if it is not found
+ */
+ RoleInstance getOwnedContainer(String id) throws NoSuchNodeException;
/**
* Create a clone of the list of live cluster nodes.
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
index 4f290af..9192efe 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
@@ -96,7 +96,7 @@
String regex = "(?!/ws)";
serveRegex(regex).with(SliderDefaultWrapperServlet.class);
- Map<String, String> params = new HashMap<>();
+ Map<String, String> params = new HashMap<String, String>();
params.put(ResourceConfig.FEATURE_IMPLICIT_VIEWABLES, "true");
params.put(ServletContainer.FEATURE_FILTER_FORWARD_ON_404, "true");
params.put(ResourceConfig.FEATURE_XMLROOTELEMENT_PROCESSING, "true");
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java
index 606c05d..5fffa4a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java
@@ -43,7 +43,7 @@
@Override
public void initFilter(FilterContainer container, Configuration conf) {
configuration = conf;
- Map<String, String> params = new HashMap<>();
+ Map<String, String> params = new HashMap<String, String>();
String proxy = WebAppUtils.getProxyHostAndPort(conf);
String[] parts = proxy.split(":");
params.put(SliderAmIpFilter.PROXY_HOST, parts[0]);
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java
index ad5e219..4c66876 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java
@@ -70,7 +70,7 @@
synchronized(this) {
if(proxyAddresses == null || (lastUpdate + updateInterval) >= now) {
try {
- proxyAddresses = new HashSet<>();
+ proxyAddresses = new HashSet<String>();
for(InetAddress add : InetAddress.getAllByName(proxyHost)) {
if (log.isDebugEnabled()) {
log.debug("proxy address is: " + add.getHostAddress());
@@ -133,7 +133,11 @@
principal);
chain.doFilter(requestWrapper, resp);
}
- } catch (IOException | ServletException e) {
+// JKD7 } catch (IOException | ServletException e) {
+ } catch (IOException e) {
+ log.warn("When fetching {}: {}", requestURI, e);
+ throw e;
+ } catch (ServletException e) {
log.warn("When fetching {}: {}", requestURI, e);
throw e;
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
index 4eebd45..4d595a9 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
@@ -126,7 +126,7 @@
*/
private TreeMap<String, RoleStatus> getRoleStatusesByName(Map<Integer, ProviderRole> rolesById,
Map<Integer, RoleStatus> statusById) {
- TreeMap<String, RoleStatus> statusByName = new TreeMap<>();
+ TreeMap<String, RoleStatus> statusByName = new TreeMap<String, RoleStatus>();
for (Entry<Integer, ProviderRole> role : rolesById.entrySet()) {
final RoleStatus status = statusById.get(role.getKey());
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java
index d3388f5..a08d46d 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java
@@ -18,6 +18,7 @@
package org.apache.slider.server.appmaster.web.rest.agent;
+import org.apache.slider.providers.agent.State;
import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import org.codehaus.jackson.annotate.JsonProperty;
import org.codehaus.jackson.map.annotate.JsonSerialize;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java
index ca2db32..0545499 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java
@@ -40,6 +40,7 @@
RegistrationCommand registrationCommand;
boolean restartAgent = false;
+ boolean restartEnabled = true;
boolean hasMappedComponents = false;
@JsonProperty("responseId")
@@ -92,6 +93,16 @@
this.restartAgent = restartAgent;
}
+ @JsonProperty("restartEnabled")
+ public boolean getRstartEnabled() {
+ return restartEnabled;
+ }
+
+ @JsonProperty("restartEnabled")
+ public void setRestartEnabled(boolean restartEnabled) {
+ this.restartEnabled = restartEnabled;
+ }
+
@JsonProperty("hasMappedComponents")
public boolean hasMappedComponents() {
return hasMappedComponents;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java
index 9299a16..a44c3a4 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java
@@ -16,15 +16,14 @@
*/
package org.apache.slider.server.appmaster.web.rest.agent;
+import org.apache.slider.providers.agent.State;
import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import org.codehaus.jackson.annotate.JsonProperty;
import org.codehaus.jackson.map.annotate.JsonSerialize;
-/**
- *
- * Data model for agent to send heartbeat to ambari and/or app master.
- *
- */
+import java.util.Map;
+
+/** Data model for agent to send heartbeat to ambari and/or app master. */
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
public class Register {
@@ -36,6 +35,9 @@
private String publicHostname;
private AgentEnv agentEnv;
private String agentVersion;
+ private State actualState;
+ private State expectedState;
+ private Map<String, String> allocatedPorts;
@JsonProperty("responseId")
public int getResponseId() {
@@ -44,13 +46,17 @@
@JsonProperty("responseId")
public void setResponseId(int responseId) {
- this.responseId=responseId;
+ this.responseId = responseId;
}
public long getTimestamp() {
return timestamp;
}
+ public void setTimestamp(long timestamp) {
+ this.timestamp = timestamp;
+ }
+
public String getHostname() {
return hostname;
}
@@ -67,10 +73,6 @@
this.hardwareProfile = hardwareProfile;
}
- public void setTimestamp(long timestamp) {
- this.timestamp = timestamp;
- }
-
public String getPublicHostname() {
return publicHostname;
}
@@ -103,15 +105,45 @@
this.currentPingPort = currentPingPort;
}
+ public State getActualState() {
+ return actualState;
+ }
+
+ public void setActualState(State actualState) {
+ this.actualState = actualState;
+ }
+
+ public State getExpectedState() {
+ return expectedState;
+ }
+
+ public void setExpectedState(State expectedState) {
+ this.expectedState = expectedState;
+ }
+
+ /** @return the allocated ports, or <code>null</code> if none are present */
+ @JsonProperty("allocatedPorts")
+ public Map<String, String> getAllocatedPorts() {
+ return allocatedPorts;
+ }
+
+ /** @param ports allocated ports */
+ @JsonProperty("allocatedPorts")
+ public void setAllocatedPorts(Map<String, String> ports) {
+ this.allocatedPorts = ports;
+ }
+
@Override
public String toString() {
String ret = "responseId=" + responseId + "\n" +
"timestamp=" + timestamp + "\n" +
- "hostname=" + hostname + "\n" +
- "currentPingPort=" + currentPingPort + "\n";
+ "hostname=" + hostname + "\n" +
+ "expectedState=" + expectedState + "\n" +
+ "actualState=" + actualState + "\n";
- if (hardwareProfile != null)
+ if (hardwareProfile != null) {
ret = ret + "hardwareprofile=" + this.hardwareProfile.toString();
+ }
return ret;
}
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java
index a439d9b..5d8b657 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java
@@ -110,7 +110,7 @@
@Produces({MediaType.APPLICATION_JSON})
public Set<URL> getAMClassPath() {
URL[] urls = ((URLClassLoader) getClass().getClassLoader()).getURLs();
- return new LinkedHashSet<>(Arrays.asList(urls));
+ return new LinkedHashSet<URL>(Arrays.asList(urls));
}
@GET
@@ -231,7 +231,7 @@
propertyName, config);
throw new NotFoundException("Property not found: " + propertyName);
}
- Map<String,String> rtnVal = new HashMap<>();
+ Map<String, String> rtnVal = new HashMap<String, String>();
rtnVal.put(propertyName, propVal);
return rtnVal;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java
index 5645e0e..95f0417 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java
@@ -96,7 +96,7 @@
protected void render(Block html) {
// TODO Probably better to just get a copy of this list for us to avoid the repeated synchronization?
// does this change if we have 50 node, 100node, 500 node clusters?
- final Map<String,RoleInstance> containerInstances = getContainerInstances(slider.getAppState().cloneActiveContainerList());
+ final Map<String,RoleInstance> containerInstances = getContainerInstances(slider.getAppState().cloneOwnedContainerList());
for (Entry<String,RoleStatus> entry : slider.getRoleStatusByName().entrySet()) {
final String name = entry.getKey();
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java
index 90a3ee7..54bdb09 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java
@@ -30,7 +30,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.net.URL;
import java.util.Map;
import java.util.Map.Entry;
@@ -63,7 +62,7 @@
UL<DIV<Hamlet>> ul = div.ul();
- ul.li("Total number of containers for cluster: " + appState.getNumActiveContainers());
+ ul.li("Total number of containers for cluster: " + appState.getNumOwnedContainers());
ul.li("Cluster created: " + getInfoAvoidingNulls(StatusKeys.INFO_CREATE_TIME_HUMAN));
ul.li("Cluster last flexed: " + getInfoAvoidingNulls(StatusKeys.INFO_FLEX_TIME_HUMAN));
ul.li("Cluster running since: " + getInfoAvoidingNulls(StatusKeys.INFO_LIVE_TIME_HUMAN));
diff --git a/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java b/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java
index 2120be5..422ffeb 100644
--- a/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java
+++ b/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java
@@ -90,7 +90,7 @@
throws IOException {
try {
DatumWriter<RoleHistoryRecord> writer =
- new SpecificDatumWriter<>(RoleHistoryRecord.class);
+ new SpecificDatumWriter<RoleHistoryRecord>(RoleHistoryRecord.class);
int roles = history.getRoleSize();
RoleHistoryHeader header = new RoleHistoryHeader();
@@ -184,7 +184,7 @@
BadConfigException {
try {
DatumReader<RoleHistoryRecord> reader =
- new SpecificDatumReader<>(RoleHistoryRecord.class);
+ new SpecificDatumReader<RoleHistoryRecord>(RoleHistoryRecord.class);
Decoder decoder =
DecoderFactory.get().jsonDecoder(RoleHistoryRecord.getClassSchema(),
in);
@@ -336,7 +336,7 @@
PathFilter filter = new GlobFilter(SliderKeys.HISTORY_FILENAME_GLOB_PATTERN);
FileStatus[] stats = fs.listStatus(dir, filter);
- List<Path> paths = new ArrayList<>(stats.length);
+ List<Path> paths = new ArrayList<Path>(stats.length);
for (FileStatus stat : stats) {
log.debug("Possible entry: {}", stat.toString());
if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) {
diff --git a/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java b/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java
index 3b2c6d7..a4447e3 100644
--- a/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java
@@ -50,7 +50,7 @@
* @return a list view with no empty strings
*/
public static List<String> prepareArgs(String[] args) {
- List<String> argsList = new ArrayList<>(args.length);
+ List<String> argsList = new ArrayList<String>(args.length);
StringBuilder argsStr = new StringBuilder("Arguments: [");
for (String arg : args) {
argsStr.append('"').append(arg).append("\" ");
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java b/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java
index 14b78a7..b3e2ff2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java
@@ -54,10 +54,10 @@
private final ServiceDiscovery<Payload> discovery;
private final Map<String, ServiceInstance<Payload>> entries =
- new HashMap<>();
+ new HashMap<String, ServiceInstance<Payload>>();
private JsonSerDeser<CuratorServiceInstance<Payload>> deser =
- new JsonSerDeser<>(CuratorServiceInstance.class);
+ new JsonSerDeser<CuratorServiceInstance<Payload>>(CuratorServiceInstance.class);
/**
* Create an instance
@@ -221,7 +221,7 @@
try {
List<String> instanceIDs = instanceIDs(servicetype);
List<CuratorServiceInstance<Payload>> instances =
- new ArrayList<>(instanceIDs.size());
+ new ArrayList<CuratorServiceInstance<Payload>>(instanceIDs.size());
for (String instanceID : instanceIDs) {
CuratorServiceInstance<Payload> instance =
queryForInstance(servicetype, instanceID);
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java b/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java
index 4938adf..e4e8523 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java
@@ -100,7 +100,8 @@
try {
List<CuratorServiceInstance<ServiceInstanceData>>
instances = registry.listInstances(name);
- return Response.ok(new CuratorServiceInstances<>(instances)).build();
+ return Response.ok(
+ new CuratorServiceInstances<ServiceInstanceData>(instances)).build();
} catch (Exception e) {
log.error("Error during generation of response", e);
return Response.serverError().build();
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java b/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java
index 1904df5..ca4d180 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java
@@ -53,7 +53,7 @@
IOException {
List<CuratorServiceInstance<ServiceInstanceData>> services =
listInstances(serviceType);
- List<ServiceInstanceData> payloads = new ArrayList<>(services.size());
+ List<ServiceInstanceData> payloads = new ArrayList<ServiceInstanceData>(services.size());
for (CuratorServiceInstance<ServiceInstanceData> instance : services) {
payloads.add(instance.payload);
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java b/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java
index 5238d90..56ee199 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java
@@ -18,6 +18,10 @@
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.RandomStringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.core.conf.MapOperations;
import org.slf4j.Logger;
@@ -25,11 +29,11 @@
import java.io.File;
import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.attribute.PosixFilePermission;
-import java.nio.file.attribute.PosixFilePermissions;
+//import java.nio.file.Files;
+//import java.nio.file.Path;
+//import java.nio.file.Paths;
+//import java.nio.file.attribute.PosixFilePermission;
+//import java.nio.file.attribute.PosixFilePermissions;
import java.util.Set;
/**
@@ -89,8 +93,11 @@
}
public static String getOpenSslCommandResult(String command, int exitCode) {
- return new StringBuilder().append("Command ").append(hideOpenSslPassword(command)).append(" was finished with exit code: ")
- .append(exitCode).append(" - ").append(getOpenSslExitCodeDescription(exitCode)).toString();
+ return new StringBuilder().append("Command ")
+ .append(hideOpenSslPassword(command))
+ .append(" was finished with exit code: ")
+ .append(exitCode).append(" - ")
+ .append(getOpenSslExitCodeDescription(exitCode)).toString();
}
private static String getOpenSslExitCodeDescription(int exitCode) {
@@ -141,15 +148,18 @@
File newCertsDir = new File(dbDir, "newcerts");
newCertsDir.mkdirs();
try {
- Set<PosixFilePermission> perms =
- PosixFilePermissions.fromString("rwx------");
- Files.setPosixFilePermissions(Paths.get(secDirFile.toURI()), perms);
- Files.setPosixFilePermissions(Paths.get(dbDir.toURI()), perms);
- Files.setPosixFilePermissions(Paths.get(newCertsDir.toURI()), perms);
+ RawLocalFileSystem fileSystem = new RawLocalFileSystem();
+ FsPermission permissions = new FsPermission(FsAction.ALL, FsAction.NONE,
+ FsAction.NONE);
+ fileSystem.setPermission(new Path(dbDir.getAbsolutePath()),
+ permissions);
+ fileSystem.setPermission(new Path(dbDir.getAbsolutePath()), permissions);
+ fileSystem.setPermission(new Path(newCertsDir.getAbsolutePath()),
+ permissions);
File indexFile = new File(dbDir, "index.txt");
indexFile.createNewFile();
- SecurityUtils.writeCaConfigFile(secDirFile.getAbsolutePath());
+ SecurityUtils.writeCaConfigFile(secDirFile.getAbsolutePath().replace('\\', '/'));
} catch (IOException e) {
LOG.error("Unable to create SSL configuration directories/files", e);
@@ -191,15 +201,19 @@
}
private static String getDefaultKeystoreLocation() {
- Path workDir = null;
+ File workDir = null;
try {
- workDir = Files.createTempDirectory("sec");
+ workDir = new File(FileUtils.getTempDirectory().getAbsolutePath()
+ + "/sec" + System.currentTimeMillis());
+ if (!workDir.mkdirs()) {
+ throw new IOException("Unable to create temporary security directory");
+ }
} catch (IOException e) {
LOG.warn("Unable to create security directory");
return null;
}
- return new StringBuilder().append(workDir.toAbsolutePath())
+ return new StringBuilder().append(workDir.getAbsolutePath())
.append(File.separator)
.append(SliderKeys.SECURITY_DIR)
.append(File.separator)
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java
index ccce6cb..46c724c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java
@@ -27,6 +27,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -65,8 +66,9 @@
* it calls its {@link #stop()} method. If the error code was non-zero,
* the service is logged as having failed.
*/
-public class ForkedProcessService extends AbstractWorkflowExecutorService implements
- LongLivedProcessLifecycleEvent, Runnable {
+public class ForkedProcessService
+ extends WorkflowExecutorService<ExecutorService>
+ implements LongLivedProcessLifecycleEvent, Runnable {
/**
* Log for the forked master process
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java
index ecc26b9..c8ff758 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java
@@ -112,7 +112,7 @@
this.processLog = processLog;
ServiceThreadFactory factory = new ServiceThreadFactory(name, true);
processExecutor = Executors.newSingleThreadExecutor(factory);
- logExecutor= Executors.newSingleThreadExecutor(factory);
+ logExecutor = Executors.newSingleThreadExecutor(factory);
processBuilder = new ProcessBuilder(commands);
processBuilder.redirectErrorStream(false);
}
@@ -288,6 +288,7 @@
*/
@Override // Runnable
public void run() {
+ Preconditions.checkNotNull(process, "null process");
LOG.debug("Lifecycle callback thread running");
//notify the callback that the process has started
if (lifecycleCallback != null) {
@@ -326,10 +327,10 @@
public void start() throws IOException {
spawnChildProcess();
- processExecutor.submit(this);
processStreamReader =
- new ProcessStreamReader(processLog, STREAM_READER_SLEEP_TIME);
+ new ProcessStreamReader(processLog, STREAM_READER_SLEEP_TIME);
logExecutor.submit(processStreamReader);
+ processExecutor.submit(this);
}
/**
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java
index 6c50798..fae3a38 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java
@@ -36,7 +36,7 @@
* started in this service's <code>start()</code> operation.
*/
public class WorkflowCallbackService<V> extends
- AbstractWorkflowExecutorService {
+ WorkflowScheduledExecutorService<ScheduledExecutorService> {
protected static final Logger LOG =
LoggerFactory.getLogger(WorkflowCallbackService.class);
private final Callable<V> callback;
@@ -48,7 +48,6 @@
private Callable<V> callable;
private ScheduledFuture<V> scheduledFuture;
-
/**
* Create an instance of the service
* @param name service name
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/AbstractWorkflowExecutorService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowExecutorService.java
similarity index 83%
rename from slider-core/src/main/java/org/apache/slider/server/services/workflow/AbstractWorkflowExecutorService.java
rename to slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowExecutorService.java
index c26e3c4..7409d32 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/AbstractWorkflowExecutorService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowExecutorService.java
@@ -18,6 +18,7 @@
package org.apache.slider.server.services.workflow;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.service.AbstractService;
import java.util.concurrent.Callable;
@@ -28,16 +29,16 @@
* A service that hosts an executor -when the service is stopped,
* {@link ExecutorService#shutdownNow()} is invoked.
*/
-public abstract class AbstractWorkflowExecutorService extends AbstractService {
+public class WorkflowExecutorService<E extends ExecutorService> extends AbstractService {
- private ExecutorService executor;
+ private E executor;
/**
* Construct an instance with the given name -but
* no executor
* @param name service name
*/
- public AbstractWorkflowExecutorService(String name) {
+ public WorkflowExecutorService(String name) {
this(name, null);
}
@@ -46,8 +47,8 @@
* @param name service name
* @param executor exectuor
*/
- protected AbstractWorkflowExecutorService(String name,
- ExecutorService executor) {
+ public WorkflowExecutorService(String name,
+ E executor) {
super(name);
this.executor = executor;
}
@@ -56,16 +57,17 @@
* Get the executor
* @return the executor
*/
- public synchronized ExecutorService getExecutor() {
+ public synchronized E getExecutor() {
return executor;
}
/**
- * Set the executor. This is protected as it
- * is intended to be restricted to subclasses
+ * Set the executor. Only valid if the current one is null
* @param executor executor
*/
- protected synchronized void setExecutor(ExecutorService executor) {
+ public synchronized void setExecutor(E executor) {
+ Preconditions.checkState(this.executor == null,
+ "Executor already set");
this.executor = executor;
}
@@ -87,14 +89,15 @@
public <V> Future<V> submit(Callable<V> callable) {
return getExecutor().submit(callable);
}
+
/**
* Stop the service: halt the executor.
* @throws Exception exception.
*/
@Override
protected void serviceStop() throws Exception {
- super.serviceStop();
stopExecutor();
+ super.serviceStop();
}
/**
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AMViewForProviders.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowScheduledExecutorService.java
similarity index 61%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/AMViewForProviders.java
rename to slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowScheduledExecutorService.java
index 287035f..e9f53ed 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AMViewForProviders.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowScheduledExecutorService.java
@@ -16,12 +16,23 @@
* limitations under the License.
*/
-package org.apache.slider.server.appmaster;
+package org.apache.slider.server.services.workflow;
-import org.apache.slider.core.exceptions.SliderException;
+import java.util.concurrent.ScheduledExecutorService;
-/** Operations available to a provider from AppMaster */
-public interface AMViewForProviders {
- /** Provider can ask AppMaster to release a specific container */
- void refreshContainer(String containerId, boolean newHostIfPossible) throws SliderException;
+/**
+ * Scheduled executor or subclass thereof
+ * @param <E> scheduled executor service type
+ */
+public class WorkflowScheduledExecutorService<E extends ScheduledExecutorService>
+ extends WorkflowExecutorService<E> {
+
+ public WorkflowScheduledExecutorService(String name) {
+ super(name);
+ }
+
+ public WorkflowScheduledExecutorService(String name,
+ E executor) {
+ super(name, executor);
+ }
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java
index fab1b9f..36d059a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java
@@ -154,7 +154,7 @@
then signals its owning service to stop once that runnable is completed.
Any exception raised in the run is stored.
</li>
- <li>{@link org.apache.slider.server.services.workflow.AbstractWorkflowExecutorService}:
+ <li>{@link org.apache.slider.server.services.workflow.WorkflowExecutorService}:
A base class for services that wish to have a {@link java.util.concurrent.ExecutorService}
with a lifespan mapped to that of a service. When the service is stopped, the
{@link java.util.concurrent.ExecutorService#shutdownNow()} method is called to
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
index 8a4e5d8..74f7a3f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
@@ -133,13 +133,12 @@
* @param blockUntilRunning block until the AM is running
* @return launcher which will have executed the command.
*/
- public ServiceLauncher<SliderClient> createMasterlessAM(
+ public ServiceLauncher<SliderClient> createStandaloneAM(
String clustername,
- int size,
boolean deleteExistingData,
boolean blockUntilRunning) {
List<String> args = [];
- return createMasterlessAMWithArgs(
+ return createStandaloneAMWithArgs(
clustername,
args,
deleteExistingData,
@@ -155,7 +154,7 @@
* @param blockUntilRunning block until the AM is running
* @return launcher which will have executed the command.
*/
- public ServiceLauncher<SliderClient> createMasterlessAMWithArgs(
+ public ServiceLauncher<SliderClient> createStandaloneAMWithArgs(
String clustername,
List<String> extraArgs,
boolean deleteExistingData,
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy
index cb05fd0..9bfeb8c 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy
@@ -29,8 +29,6 @@
import org.apache.slider.core.exceptions.UnknownApplicationInstanceException
import org.apache.slider.core.main.LauncherExitCodes
import org.apache.slider.core.main.ServiceLauncher
-import org.apache.slider.test.SliderTestUtils
-import org.junit.Assert
import org.junit.Before
import org.junit.Test
@@ -45,7 +43,7 @@
@Before
public void setup() {
super.setup()
- createMiniCluster("TestActionExists", configuration, 1, false)
+ createMiniCluster("", configuration, 1, false)
}
@Test
@@ -72,7 +70,10 @@
public void testExistsLiveCluster() throws Throwable {
//launch the cluster
String clustername = createClusterName()
- ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, false)
+ ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+ clustername,
+ true,
+ false)
SliderClient sliderClient = launcher.service
addToTeardown(launcher)
ApplicationReport report = waitForClusterLive(sliderClient)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy
index 66fdc15..b7196ac 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy
@@ -40,7 +40,7 @@
@Before
public void setup() {
super.setup()
- createMiniCluster("test_action_list", configuration, 1, false)
+ createMiniCluster("", configuration, 1, false)
}
/**
@@ -88,7 +88,10 @@
public void testListLiveCluster() throws Throwable {
//launch the cluster
String clustername = createClusterName()
- ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, false)
+ ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+ clustername,
+ true,
+ false)
addToTeardown(launcher)
//do the low level operations to get a better view of what is going on
SliderClient sliderClient = launcher.service
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy
index bdee39f..bae8cea 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy
@@ -30,10 +30,8 @@
import org.apache.slider.common.params.Arguments
import org.apache.slider.client.SliderClient
import org.apache.slider.common.params.ActionStatusArgs
-import org.apache.slider.common.params.ClientArgs
import org.apache.hadoop.yarn.api.records.ApplicationReport
import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.slider.core.main.LauncherExitCodes
import org.apache.slider.core.main.ServiceLauncher
import org.junit.Before
import org.junit.Test
@@ -48,7 +46,7 @@
@Before
public void setup() {
super.setup()
- createMiniCluster("test_action_status", configuration, 1, false)
+ createMiniCluster("", configuration, 1, false)
}
/**
@@ -71,7 +69,7 @@
new YarnConfiguration(miniCluster.config),
[
SliderActions.ACTION_STATUS,
- "test_status_missing_cluster",
+ "teststatusmissingcluster",
Arguments.ARG_MANAGER, RMAddr
]
)
@@ -84,10 +82,13 @@
public void testStatusLiveCluster() throws Throwable {
describe("create a live cluster then exec the status command")
- String clustername = "test_status_live_cluster"
+ String clustername = "teststatuslivecluster"
//launch the cluster
- ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, false)
+ ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+ clustername,
+ true,
+ false)
SliderClient sliderClient = launcher.service
ApplicationReport report = waitForClusterLive(sliderClient)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy
index 9d41b8c..f5eff25 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy
@@ -42,15 +42,14 @@
@Test
public void testFreezeCommands() throws Throwable {
- String clustername = "test_freeze_commands"
YarnConfiguration conf = configuration
- createMiniCluster(clustername, conf, 1, 1, 1, true, false)
+ String clustername = createMiniCluster("", conf, 1, 1, 1, true, false)
describe "create a masterless AM, freeze it, try to freeze again"
- ServiceLauncher<SliderClient> launcher = createMasterlessAM(
- clustername,
- 0,
+ ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+ clustername
+ ,
true,
true);
addToTeardown(launcher.service);
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy
index e833423..04be7c0 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy
@@ -22,6 +22,7 @@
import groovy.util.logging.Slf4j
import org.apache.hadoop.fs.FileSystem as HadoopFS
import org.apache.hadoop.fs.Path
+import org.apache.hadoop.yarn.api.records.ApplicationReport
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.slider.agent.AgentMiniClusterTestBase
import org.apache.slider.client.SliderClient
@@ -48,9 +49,8 @@
@Test
public void testFreezeThawMasterlessAM() throws Throwable {
- String clustername = "test_freeze_thaw_masterless_am"
YarnConfiguration conf = configuration
- createMiniCluster(clustername, conf, 1, 1, 1, true, false)
+ String clustername = createMiniCluster("", conf, 1, 1, 1, true, false)
describe "create a masterless AM, freeze it, thaw it"
//copy the confdir somewhere
@@ -59,7 +59,10 @@
SliderUtils.copyDirectory(conf, resConfPath, tempConfPath, null)
- ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, true)
+ ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+ clustername,
+ true,
+ true)
SliderClient sliderClient = launcher.service
addToTeardown(sliderClient);
@@ -74,8 +77,10 @@
//now start the cluster
ServiceLauncher launcher2 = thawCluster(clustername, [], true);
SliderClient newCluster = launcher2.service
+ addToTeardown(newCluster);
+
+// ApplicationReport report = waitForClusterLive(newCluster)
newCluster.getClusterDescription(clustername);
-
//freeze
assert 0 == clusterActionFreeze(sliderClient, clustername)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy
index ebec43b..b81bc77 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy
@@ -39,9 +39,8 @@
@Test
public void testFreezeUnknownCluster() throws Throwable {
- String clustername = "test_start_unknown_cluster"
YarnConfiguration conf = configuration
- createMiniCluster(clustername, conf, 1, true)
+ String clustername = createMiniCluster("", conf, 1, true)
describe "try to freeze a cluster that isn't defined"
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy
index 4c90445..b49032d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy
@@ -37,8 +37,7 @@
@Test
public void testThawUnknownCluster() throws Throwable {
- String clustername = "test_thaw_unknown_cluster"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "try to start a cluster that isn't defined"
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy
index 160420b..b253c17 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy
@@ -39,14 +39,13 @@
@Test
public void testBadAMHeap() throws Throwable {
- String clustername = "test_bad_am_heap"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that bad Java heap options are picked up"
try {
ServiceLauncher<SliderClient> launcher =
- createMasterlessAMWithArgs(clustername,
+ createStandaloneAMWithArgs(clustername,
[
Arguments.ARG_COMP_OPT,
SliderKeys.COMPONENT_AM,
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy
index 5509314..13d5398 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy
@@ -44,14 +44,13 @@
@Test
public void testBadYarnQueue() throws Throwable {
skip("untestable in minicluster")
- String clustername = "test_bad_yarn_queue"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that a bad yarn queue fails the launch"
try {
ServiceLauncher<SliderClient> launcher =
- createMasterlessAMWithArgs(clustername,
+ createStandaloneAMWithArgs(clustername,
[
Arguments.ARG_DEFINE,
SliderXmlConfKeys.KEY_YARN_QUEUE + "=noqueue",
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy
index 2933c90..0665a9a 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy
@@ -22,6 +22,8 @@
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.ApplicationReport
import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.api.ClusterDescription
+import org.apache.slider.api.ResourceKeys
import org.apache.slider.client.SliderClient
import org.apache.slider.common.SliderExitCodes
import org.apache.slider.common.params.SliderActions
@@ -30,6 +32,11 @@
import org.apache.slider.core.main.ServiceLauncher
import org.junit.Test
+import static org.apache.slider.common.params.Arguments.ARG_COMP_OPT
+import static org.apache.slider.common.params.Arguments.ARG_RESOURCE_OPT
+import static org.apache.slider.common.params.Arguments.ARG_RES_COMP_OPT
+import static org.apache.slider.providers.agent.AgentKeys.SERVICE_NAME
+
@CompileStatic
@Slf4j
@@ -37,8 +44,7 @@
@Test
public void testBuildCluster() throws Throwable {
- String clustername = "test_build_cluster"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that a build cluster is created but not started"
@@ -46,6 +52,67 @@
SliderActions.ACTION_BUILD,
clustername,
[:],
+ [ARG_RESOURCE_OPT, "yarn.container.failure.window.years", "4"],
+ true,
+ false,
+ agentDefOptions)
+ SliderClient sliderClient = launcher.service
+ addToTeardown(sliderClient);
+
+ //verify that exists(live) is now false
+ assert LauncherExitCodes.EXIT_FALSE ==
+ sliderClient.actionExists(clustername, true)
+
+ //but the cluster is still there for the default
+ assert 0 == sliderClient.actionExists(clustername, false)
+
+
+
+ // verify the YARN registry doesn't know of it
+ def serviceRegistryClient = sliderClient.YARNRegistryClient
+ ApplicationReport report = serviceRegistryClient.findInstance(clustername)
+ assert report == null;
+
+ // verify that global resource options propagate from the CLI
+ def aggregateConf = sliderClient.loadPersistedClusterDescription(clustername)
+ def windowDays = aggregateConf.resourceOperations.globalOptions.getMandatoryOptionInt(
+ "yarn.container.failure.window.years")
+ assert 4 == windowDays
+
+ //and a second attempt will fail as the cluster now exists
+ try {
+ ServiceLauncher<SliderClient> cluster2 = createOrBuildCluster(
+ SliderActions.ACTION_BUILD,
+ clustername,
+ [:],
+ [],
+ false,
+ false,
+ agentDefOptions)
+ fail("expected an exception, got $cluster2.service")
+ } catch (SliderException e) {
+ assertExceptionDetails(e, SliderExitCodes.EXIT_INSTANCE_EXISTS, "")
+ }
+
+
+
+ //thaw time
+ ServiceLauncher<SliderClient> l2 = thawCluster(clustername, [], true)
+ SliderClient thawed = l2.service
+ addToTeardown(thawed);
+ waitForClusterLive(thawed)
+ }
+
+ @Test
+ public void testUpdateCluster() throws Throwable {
+ String clustername = createMiniCluster("", configuration, 1, true)
+
+ describe "verify that a built cluster can be updated"
+
+ ServiceLauncher<SliderClient> launcher = createOrBuildCluster(
+ SliderActions.ACTION_BUILD,
+ clustername,
+ [:],
[],
true,
false,
@@ -64,26 +131,29 @@
ApplicationReport report = serviceRegistryClient.findInstance(clustername)
assert report == null;
- //and a second attempt will fail as the cluster now exists
- try {
- ServiceLauncher<SliderClient> cluster2 = createOrBuildCluster(
- SliderActions.ACTION_BUILD,
- clustername,
- [:],
- [],
- false,
- false,
- agentDefOptions)
- fail("expected an exception, got $cluster2.service")
- } catch (SliderException e) {
- assertExceptionDetails(e, SliderExitCodes.EXIT_INSTANCE_EXISTS, "")
- }
+ def master = "hbase-master"
+ createOrBuildCluster(
+ SliderActions.ACTION_UPDATE,
+ clustername,
+ [(master): 1],
+ [
+ ARG_RES_COMP_OPT, master, ResourceKeys.COMPONENT_PRIORITY, "2",
+ ARG_COMP_OPT, master, SERVICE_NAME, "HBASE",
+ ],
+ true,
+ false,
+ agentDefOptions)
- //thaw time
- ServiceLauncher<SliderClient> l2 = thawCluster(clustername, [], true)
- SliderClient thawed = l2.service
- addToTeardown(thawed);
- waitForClusterLive(thawed)
+ launcher = thawCluster(clustername, [], true);
+ sliderClient = launcher.service
+ addToTeardown(sliderClient);
+ waitForClusterLive(sliderClient)
+
+ dumpClusterStatus(sliderClient, "application status after update")
+
+ ClusterDescription cd = sliderClient.clusterDescription
+ Map<String, String> masterRole = cd.getRole(master)
+ assert masterRole != null, "Role hbase-master must exist"
+ assert cd.roleNames.contains(master), "Role names must contain hbase-master"
}
-
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestDestroyMasterlessAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
similarity index 90%
rename from slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestDestroyMasterlessAM.groovy
rename to slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
index 37fa0ec..17e4ff2 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestDestroyMasterlessAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
@@ -39,14 +39,13 @@
@CompileStatic
@Slf4j
-class TestDestroyMasterlessAM extends AgentMiniClusterTestBase {
+class TestStandaloneAMDestroy extends AgentMiniClusterTestBase {
@Test
- public void testDestroyMasterlessAM() throws Throwable {
- String clustername = "test_destroy_masterless_am"
- createMiniCluster(clustername, configuration, 1, false)
+ public void testDestroyStandaloneAM() throws Throwable {
+ String clustername = createMiniCluster("", configuration, 1, false)
- describe "create a masterless AM, stop it, try to create" +
+ describe "create a Standalone AM, stop it, try to create" +
"a second cluster with the same name, destroy it, try a third time"
ServiceLauncher<SliderClient> launcher1 = launchClientAgainstMiniMR(
@@ -58,9 +57,10 @@
])
assert launcher1.serviceExitCode == 0
-
-
- ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, true)
+ ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+ clustername,
+ true,
+ true)
SliderClient sliderClient = launcher.service
addToTeardown(sliderClient);
@@ -82,7 +82,7 @@
//now try to create instance #2, and expect an in-use failure
try {
- createMasterlessAM(clustername, 0, false, false)
+ createStandaloneAM(clustername, false, false)
fail("expected a failure, got an AM")
} catch (SliderException e) {
assertExceptionDetails(e,
@@ -122,7 +122,7 @@
describe "recreating $clustername"
//and create a new cluster
- launcher = createMasterlessAM(clustername, 0, false, true)
+ launcher = createStandaloneAM(clustername, false, true)
SliderClient cluster2 = launcher.service
// do an echo here of a large string
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestKillMasterlessAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMKill.groovy
similarity index 86%
rename from slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestKillMasterlessAM.groovy
rename to slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMKill.groovy
index b4331af..75f9a2c 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestKillMasterlessAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMKill.groovy
@@ -36,17 +36,16 @@
@CompileStatic
@Slf4j
-class TestKillMasterlessAM extends AgentMiniClusterTestBase {
+class TestStandaloneAMKill extends AgentMiniClusterTestBase {
@Test
- public void testKillMasterlessAM() throws Throwable {
- String clustername = "test_kill_masterless_am"
- createMiniCluster(clustername, configuration, 1, true)
+ public void testKillStandaloneAM() throws Throwable {
+ String clustername = createMiniCluster("", configuration, 1, true)
- describe "kill a masterless AM and verify that it shuts down"
+ describe "kill a Standalone AM and verify that it shuts down"
ServiceLauncher<SliderClient> launcher =
- createMasterlessAMWithArgs(clustername,
+ createStandaloneAMWithArgs(clustername,
[
Arguments.ARG_OPTION, SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, "1"
],
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
new file mode 100644
index 0000000..162bab0
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.agent.standalone
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.SleepJob
+import org.apache.hadoop.yarn.api.records.ApplicationReport
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.api.InternalKeys
+import org.apache.slider.api.ResourceKeys
+import org.apache.slider.client.SliderClient
+import org.apache.slider.common.SliderXmlConfKeys
+import org.apache.slider.common.params.ActionAMSuicideArgs
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.core.exceptions.ErrorStrings
+import org.apache.slider.core.main.ServiceLauncher
+import org.junit.Test
+
+/**
+ * kill a masterless AM and verify it shuts down. This test
+ * also sets the retry count to 1 to stop recreation attempts
+ */
+@CompileStatic
+@Slf4j
+
+class TestStandaloneAMMonkeyRestart extends AgentMiniClusterTestBase {
+
+
+ @Test
+ public void testStandaloneAMMonkeyRestart() throws Throwable {
+ describe "Run a Standalone AM with the Chaos monkey set to kill it"
+ // patch the configuration for AM restart
+ int threshold = 2;
+ YarnConfiguration conf = getRestartableConfiguration(threshold)
+
+ String clustername = createMiniCluster("", conf, 1, true)
+ ServiceLauncher<SliderClient> launcher =
+ createStandaloneAMWithArgs(clustername,
+ [
+ Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_ENABLED, "true",
+ Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "8",
+ Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "75000",
+ ],
+ true,
+ false)
+ SliderClient sliderClient = launcher.service
+ addToTeardown(sliderClient);
+
+ ApplicationReport report
+ report = waitForClusterLive(sliderClient, 30000)
+ describe "Waiting for the cluster to fail"
+ sleep(40000)
+ // end of process
+ report = sliderClient.applicationReport
+ log.info(report.diagnostics)
+ assert report.currentApplicationAttemptId.attemptId == threshold
+ assert YarnApplicationState.FAILED == report.yarnApplicationState
+ assert FinalApplicationStatus.FAILED == report.finalApplicationStatus
+ }
+
+ /**
+ * Get a restartable configuration
+ * @param restarts
+ * @return
+ */
+ public YarnConfiguration getRestartableConfiguration(int restarts) {
+ def conf = new YarnConfiguration(configuration)
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, restarts)
+ conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, restarts)
+ conf
+ }
+
+
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
new file mode 100644
index 0000000..8d9318a
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.agent.standalone
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.ApplicationReport
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.client.SliderClient
+import org.apache.slider.common.SliderXmlConfKeys
+import org.apache.slider.common.params.ActionAMSuicideArgs
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.core.main.ServiceLauncher
+import org.junit.Test
+
+/**
+ * kill a masterless AM and verify it shuts down. This test
+ * also sets the retry count to 1 to stop recreation attempts
+ */
+@CompileStatic
+@Slf4j
+
+class TestStandaloneAMRestart extends AgentMiniClusterTestBase {
+
+
+ @Test
+ public void testRestartStandaloneAM() throws Throwable {
+ describe "kill a Standalone AM and verify that it restarts"
+ // patch the configuration for AM restart
+ YarnConfiguration conf = getRestartableConfiguration(5)
+
+ String clustername = createMiniCluster("", conf, 1, true)
+ ServiceLauncher<SliderClient> launcher =
+ createStandaloneAMWithArgs(clustername,
+ [
+ Arguments.ARG_OPTION, SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, "4"
+ ],
+ true,
+ false)
+ SliderClient sliderClient = launcher.service
+ addToTeardown(sliderClient);
+
+ ApplicationReport report = waitForClusterLive(sliderClient)
+ logReport(report)
+ waitUntilClusterLive(sliderClient, 30000)
+
+
+ ActionAMSuicideArgs args = new ActionAMSuicideArgs()
+ args.message = "test AM iteration"
+ args.waittime = 100
+ args.exitcode = 1
+ sliderClient.actionAmSuicide(clustername, args)
+ waitWhileClusterLive(sliderClient);
+ //give yarn some time to notice
+ sleep(20000)
+ waitUntilClusterLive(sliderClient, 40000)
+
+
+ // app should be running here
+ assert 0 == sliderClient.actionExists(clustername, true)
+
+
+ clusterActionFreeze(sliderClient, clustername)
+ }
+
+ /**
+ * Get a restartable configuration
+ * @param restarts
+ * @return
+ */
+ public YarnConfiguration getRestartableConfiguration(int restarts) {
+ def conf = new YarnConfiguration(configuration)
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, restarts)
+ conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, restarts)
+ conf
+ }
+
+
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy
index bce24e6..73e9b07 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy
@@ -37,17 +37,16 @@
@CompileStatic
@Slf4j
class TestStandaloneAgentAM extends AgentMiniClusterTestBase {
+
@Test
public void testStandaloneAgentAM() throws Throwable {
-
describe "create a masterless AM then get the service and look it up via the AM"
//launch fake master
- String clustername = "test_standalone_agent_am"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
ServiceLauncher<SliderClient> launcher =
- createMasterlessAM(clustername, 0, true, false)
+ createStandaloneAM(clustername, true, false)
SliderClient client = launcher.service
addToTeardown(client);
@@ -124,7 +123,7 @@
assert oldInstance.yarnApplicationState >= YarnApplicationState.FINISHED
//create another AM
- launcher = createMasterlessAM(clustername, 0, true, true)
+ launcher = createStandaloneAM(clustername, true, true)
client = launcher.service
ApplicationId i2AppID = client.applicationId
@@ -141,7 +140,7 @@
describe("attempting to create instance #3")
//now try to create instance #3, and expect an in-use failure
try {
- createMasterlessAM(clustername, 0, false, true)
+ createStandaloneAM(clustername, false, true)
fail("expected a failure, got a masterless AM")
} catch (SliderException e) {
assertFailureClusterInUse(e);
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy
index b2cb670..cf97520 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy
@@ -38,7 +38,7 @@
describe "verify that bad cluster names are picked up"
try {
- addToTeardown(createMasterlessAM(clustername, 0, true, false).service);
+ addToTeardown(createStandaloneAM(clustername, true, false).service);
fail("expected a failure")
} catch (ServiceLaunchException e) {
assertExceptionDetails(e, LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy
index cb55624..b8a590e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy
@@ -62,10 +62,9 @@
describe "create a masterless AM then perform registry operations on it"
//launch fake master
- String clustername = "test_standalone_registry_am"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster(configuration, 1, true)
ServiceLauncher<SliderClient> launcher
- launcher = createMasterlessAM(clustername, 0, true, false)
+ launcher = createStandaloneAM(clustername, true, false)
SliderClient client = launcher.service
addToTeardown(client);
diff --git a/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy b/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy
index 12e4a43..d94cd54 100644
--- a/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy
@@ -391,6 +391,19 @@
}
@Test
+ public void testArgUpdate() throws Throwable {
+ def ca = createClientArgs([
+ ACTION_UPDATE, 'cluster1',
+ ARG_TEMPLATE, "appConfig.json",
+ ])
+ assert ca.action == ACTION_UPDATE
+ assert ca.coreAction instanceof ActionUpdateArgs
+ assert ca.actionUpdateArgs instanceof ActionUpdateArgs
+ AbstractClusterBuildingActionArgs args = ca.actionUpdateArgs
+ assert args.template != null
+ }
+
+ @Test
public void testFlexArgs() throws Throwable {
def ca = createClientArgs([
ACTION_FLEX, 'cluster1',
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy
index 9b4c377..6dee64f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy
@@ -140,6 +140,36 @@
clusterOps)
}
+ /**
+ * Update an agent cluster
+ * @param clustername
+ * @param roles
+ * @param extraArgs
+ * @param deleteExistingData
+ * @return the cluster launcher
+ */
+ public ServiceLauncher<SliderClient> updateAgentCluster(
+ String clustername,
+ Map<String, Integer> roles,
+ List<String> extraArgs,
+ boolean deleteExistingData) {
+
+ YarnConfiguration conf = testConfiguration
+
+ def clusterOps = [
+ :
+ ]
+
+ return createOrBuildCluster(
+ SliderActions.ACTION_UPDATE,
+ clustername,
+ roles,
+ extraArgs,
+ deleteExistingData,
+ false,
+ clusterOps)
+ }
+
public String getApplicationHome() {
return "/"
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy
index 7d68458..e248ec3 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy
@@ -27,22 +27,24 @@
import org.apache.slider.common.SliderKeys
import org.apache.slider.core.conf.MapOperations
import org.apache.slider.core.main.ServiceLauncher
-import org.apache.slider.server.appmaster.web.SliderAMWebApp
import org.apache.slider.server.appmaster.web.rest.agent.RegistrationResponse
import org.apache.slider.server.appmaster.web.rest.agent.RegistrationStatus
import org.apache.slider.server.services.security.CertificateManager
import org.apache.slider.server.services.security.SecurityUtils
+import org.junit.Before
import org.junit.Test
import org.slf4j.Logger
import org.slf4j.LoggerFactory
+import javax.net.ssl.HostnameVerifier
+import javax.net.ssl.HttpsURLConnection
+import javax.net.ssl.SSLSession
import javax.ws.rs.core.MediaType
import static org.apache.slider.common.params.Arguments.ARG_OPTION
import static org.apache.slider.providers.agent.AgentKeys.*
import static org.apache.slider.providers.agent.AgentTestUtils.createDummyJSONRegister
import static org.apache.slider.providers.agent.AgentTestUtils.createTestClient
-import static org.apache.slider.test.SliderTestUtils.log
@CompileStatic
@Slf4j
@@ -52,10 +54,10 @@
final static Logger logger = LoggerFactory.getLogger(TestAgentAMManagementWS.class)
static {
//for localhost testing only
- javax.net.ssl.HttpsURLConnection.setDefaultHostnameVerifier(
- new javax.net.ssl.HostnameVerifier(){
+ HttpsURLConnection.setDefaultHostnameVerifier(
+ new HostnameVerifier(){
public boolean verify(String hostname,
- javax.net.ssl.SSLSession sslSession) {
+ SSLSession sslSession) {
logger.info("verifying hostname ${hostname}")
InetAddress[] addresses =
InetAddress.getAllByName(hostname);
@@ -73,6 +75,12 @@
}
});
+ }
+
+ @Override
+ @Before
+ void setup() {
+ super.setup()
MapOperations compOperations = new MapOperations();
compOperations.put(SliderKeys.KEYSTORE_LOCATION, "/tmp/work/security/keystore.p12");
SecurityUtils.initializeSecurityParameters(compOperations);
@@ -83,13 +91,12 @@
System.setProperty("javax.net.ssl.trustStore", keystoreFile);
System.setProperty("javax.net.ssl.trustStorePassword", password);
System.setProperty("javax.net.ssl.trustStoreType", "PKCS12");
+
}
- @Test
+ @Test
public void testAgentAMManagementWS() throws Throwable {
- def clustername = "test_agentammanagementws"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster("",
configuration,
1,
1,
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
index 2f03b09..a29c8cb 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
@@ -43,9 +43,7 @@
@Test
public void testEchoOperation() throws Throwable {
- def clustername = "test_agent_echo"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster("",
configuration,
1,
1,
@@ -68,7 +66,7 @@
def role = "echo"
Map<String, Integer> roles = [
- (role): 1,
+ (role): 2,
];
ServiceLauncher<SliderClient> launcher = buildAgentCluster(clustername,
roles,
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy
index 421920f..99f7f49 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy
@@ -69,9 +69,8 @@
@Test
public void testBuildMultipleRoles() throws Throwable {
- def clustername = "test_build_basic_agent"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster(
+ "",
configuration,
1,
1,
@@ -223,7 +222,98 @@
true, false,
false)
}
+
+ @Test
+ public void testUpdateBasicAgent() throws Throwable {
+ String clustername = createMiniCluster(
+ "",
+ configuration,
+ 1,
+ 1,
+ 1,
+ true,
+ false)
+
+ def master = "hbase-master"
+ def rs = "hbase-rs"
+ ServiceLauncher<SliderClient> launcher = buildAgentCluster(clustername,
+ [
+ (ROLE_NODE): 5,
+ (master): 1,
+ (rs): 5
+ ],
+ [
+ ARG_OPTION, CONTROLLER_URL, "http://localhost",
+ ARG_OPTION, PACKAGE_PATH, ".",
+ ARG_OPTION, APP_DEF, "file://" + getAppDef().absolutePath,
+ ARG_OPTION, AGENT_CONF, "file://" + getAgentConf().absolutePath,
+ ARG_COMP_OPT, master, SCRIPT_PATH, "agent/scripts/agent.py",
+ ARG_COMP_OPT, rs, SCRIPT_PATH, "agent/scripts/agent.py",
+ ARG_RES_COMP_OPT, master, ResourceKeys.COMPONENT_PRIORITY, "2",
+ ARG_RES_COMP_OPT, rs, ResourceKeys.COMPONENT_PRIORITY, "3",
+ ARG_COMP_OPT, master, SERVICE_NAME, "HBASE",
+ ARG_COMP_OPT, rs, SERVICE_NAME, "HBASE",
+ ARG_COMP_OPT, master, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+ ARG_COMP_OPT, rs, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+ ARG_COMP_OPT, ROLE_NODE, SCRIPT_PATH, "agent/scripts/agent.py",
+ ARG_RES_COMP_OPT, ROLE_NODE, ResourceKeys.COMPONENT_PRIORITY, "1",
+ ],
+ true, false,
+ false)
+ def instanceD = launcher.service.loadPersistedClusterDescription(
+ clustername)
+ dumpClusterDescription("$clustername:", instanceD)
+ def resource = instanceD.getResourceOperations()
+
+ def agentComponent = resource.getMandatoryComponent(ROLE_NODE)
+ agentComponent.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+ def masterC = resource.getMandatoryComponent(master)
+ assert "2" == masterC.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+ def rscomponent = resource.getMandatoryComponent(rs)
+ assert "5" == rscomponent.getMandatoryOption(ResourceKeys.COMPONENT_INSTANCES)
+
+ // change master priority and rs instances through update action
+ ServiceLauncher<SliderClient> launcher2 = updateAgentCluster(clustername,
+ [
+ (ROLE_NODE): 5,
+ (master): 1,
+ (rs): 6
+ ],
+ [
+ ARG_OPTION, CONTROLLER_URL, "http://localhost",
+ ARG_OPTION, PACKAGE_PATH, ".",
+ ARG_OPTION, APP_DEF, "file://" + getAppDef().absolutePath,
+ ARG_OPTION, AGENT_CONF, "file://" + getAgentConf().absolutePath,
+ ARG_COMP_OPT, master, SCRIPT_PATH, "agent/scripts/agent.py",
+ ARG_COMP_OPT, rs, SCRIPT_PATH, "agent/scripts/agent.py",
+ ARG_RES_COMP_OPT, master, ResourceKeys.COMPONENT_PRIORITY, "4",
+ ARG_RES_COMP_OPT, rs, ResourceKeys.COMPONENT_PRIORITY, "3",
+ ARG_COMP_OPT, master, SERVICE_NAME, "HBASE",
+ ARG_COMP_OPT, rs, SERVICE_NAME, "HBASE",
+ ARG_COMP_OPT, master, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+ ARG_COMP_OPT, rs, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+ ARG_COMP_OPT, ROLE_NODE, SCRIPT_PATH, "agent/scripts/agent.py",
+ ARG_RES_COMP_OPT, ROLE_NODE, ResourceKeys.COMPONENT_PRIORITY, "1",
+ ],
+ true)
+ def instanceDef = launcher.service.loadPersistedClusterDescription(
+ clustername)
+ dumpClusterDescription("$clustername:", instanceDef)
+ def resource2 = instanceDef.getResourceOperations()
+
+ def agentComponent2 = resource2.getMandatoryComponent(ROLE_NODE)
+ agentComponent2.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+ def masterC2 = resource2.getMandatoryComponent(master)
+ assert "4" == masterC2.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+ def rscomponent2 = resource2.getMandatoryComponent(rs)
+ assert "6" == rscomponent2.getMandatoryOption(ResourceKeys.COMPONENT_INSTANCES)
+ }
+
public AggregateConf loadInstanceDefinition(String name) {
def cluster4
def sliderFS = createSliderFileSystem()
@@ -235,10 +325,9 @@
}
@Test
- public void testAgentArgs() throws Throwable {
- def clustername = "test_good_agent_args"
- createMiniCluster(
- clustername,
+ public void testGoodAgentArgs() throws Throwable {
+ String clustername = createMiniCluster(
+ "",
configuration,
1,
1,
@@ -253,7 +342,7 @@
[
ARG_OPTION, CONTROLLER_URL, "http://localhost",
ARG_PACKAGE, ".",
- ARG_OPTION, APP_DEF, "file://" + getAppDef().absolutePath,
+ ARG_OPTION, APP_DEF, "file://" + appDef.absolutePath,
ARG_RESOURCES, TEST_FILES + "good/resources.json",
ARG_TEMPLATE, TEST_FILES + "good/appconf.json"
],
@@ -268,9 +357,8 @@
@Test
public void testBadAgentArgs() throws Throwable {
- def clustername = "test_bad_agent_args"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster(
+ "",
configuration,
1,
1,
@@ -360,10 +448,8 @@
@Test
public void testTemplateArgs() throws Throwable {
-
- def clustername = "test_build_template_args"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster(
+ "",
configuration,
1,
1,
@@ -388,10 +474,8 @@
@Test
public void testBadTemplates() throws Throwable {
-
- def clustername = "test_bad_template_args"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster(
+ "",
configuration,
1,
1,
diff --git a/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy b/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy
index 4cc0f08..1a1e5aa 100644
--- a/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy
@@ -50,6 +50,7 @@
public static final String REGISTRY_URI = RestPaths.SLIDER_PATH_REGISTRY;
public static final String WADL = "vnd.sun.wadl+xml"
+ public static final String CLUSTERNAME = "testregistryws"
private String id(String instanceName) {
@@ -64,7 +65,7 @@
@Test
public void testRestURIs() throws Throwable {
- def clustername = "test_registryws"
+ def clustername = CLUSTERNAME
createMiniCluster(
clustername,
configuration,
@@ -143,7 +144,8 @@
webResource = client.resource(
appendToURL(registry_url,
- "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/"+id("test_registryws")));
+ "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/"+id(
+ clustername)));
service = webResource.type(MediaType.APPLICATION_JSON)
.get(CuratorServiceInstance.class);
validateService(service)
@@ -164,7 +166,7 @@
try {
webResource = client.resource(appendToURL(registry_url,
- "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/test_registryws-99"));
+ "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/testregistryws99"));
service = webResource.type(MediaType.APPLICATION_JSON)
.get(CuratorServiceInstance.class);
@@ -188,6 +190,6 @@
private void validateService(CuratorServiceInstance service) {
assert service.name.equals(SliderKeys.APP_TYPE)
assert service.serviceType == ServiceType.DYNAMIC
- assert service.id.contains("test_registryws")
+ assert service.id.contains(CLUSTERNAME)
}
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/actions/TestActions.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/actions/TestActions.groovy
new file mode 100644
index 0000000..7e03e7b
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/actions/TestActions.groovy
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions
+
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.service.ServiceOperations
+import org.apache.slider.server.appmaster.SliderAppMaster
+import org.apache.slider.server.appmaster.state.AppState
+import org.apache.slider.server.services.workflow.ServiceThreadFactory
+import org.apache.slider.server.services.workflow.WorkflowExecutorService
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.AtomicLong
+
+@Slf4j
+//@CompileStatic
+class TestActions {
+
+ QueueService queues;
+ WorkflowExecutorService<ExecutorService> executorService;
+
+
+ @Before
+ void createService() {
+ queues = new QueueService();
+
+ def conf = new Configuration()
+ queues.init(conf)
+
+ queues.start();
+
+ executorService = new WorkflowExecutorService<>("AmExecutor",
+ Executors.newCachedThreadPool(
+ new ServiceThreadFactory("AmExecutor", true)));
+
+ executorService.init(conf)
+ executorService.start();
+ }
+
+ @After
+ void destroyService() {
+ ServiceOperations.stop(executorService);
+ ServiceOperations.stop(queues);
+ }
+
+ @Test
+ public void testBasicService() throws Throwable {
+ queues.start();
+ }
+
+ @Test
+ public void testDelayLogic() throws Throwable {
+ ActionNoteExecuted action = new ActionNoteExecuted("", 1000)
+ long now = System.currentTimeMillis();
+
+ def delay = action.getDelay(TimeUnit.MILLISECONDS)
+ assert delay >= 800
+ assert delay <= 1800
+
+ ActionNoteExecuted a2 = new ActionNoteExecuted("a2", 10000)
+ assert action.compareTo(a2) < 0
+ assert a2.compareTo(action) > 0
+ assert action.compareTo(action)== 0
+
+ }
+
+ @Test
+ public void testActionDelayedExecutorTermination() throws Throwable {
+ long start = System.currentTimeMillis()
+
+ ActionStopQueue stopAction = new ActionStopQueue(1000);
+ queues.scheduledActions.add(stopAction);
+ queues.run();
+ AsyncAction take = queues.actionQueue.take();
+ assert take == stopAction
+ long stop = System.currentTimeMillis();
+ assert stop - start > 500
+ assert stop - start < 1500
+ }
+
+ @Test
+ public void testImmediateQueue() throws Throwable {
+ ActionNoteExecuted noteExecuted = new ActionNoteExecuted("executed", 0)
+ queues.put(noteExecuted)
+ queues.put(new ActionStopQueue(0))
+ QueueExecutor ex = new QueueExecutor(queues)
+ ex.run();
+ assert queues.actionQueue.empty
+ assert noteExecuted.executed.get()
+ }
+
+ @Test
+ public void testActionOrdering() throws Throwable {
+
+ ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+ def stop = new ActionStopQueue(1500)
+ ActionNoteExecuted note2 = new ActionNoteExecuted("note2", 800)
+
+ List<AsyncAction> actions = [note1, stop, note2]
+ Collections.sort(actions)
+ assert actions[0] == note1
+ assert actions[1] == note2
+ assert actions[2] == stop
+ }
+
+ @Test
+ public void testDelayedQueueWithReschedule() throws Throwable {
+
+ ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+ def stop = new ActionStopQueue(1500)
+ ActionNoteExecuted note2 = new ActionNoteExecuted("note2", 800)
+
+ assert note2.compareTo(stop) < 0
+ assert note1.nanos < note2.nanos
+ assert note2.nanos < stop.nanos
+ queues.schedule(note1)
+ queues.schedule(note2)
+ queues.schedule(stop)
+ // async to sync expected to run in order
+ runQueuesToCompletion()
+ assert note1.executed.get()
+ assert note2.executed.get()
+ }
+
+ public void runQueuesToCompletion() {
+ queues.run();
+ assert queues.scheduledActions.empty
+ assert !queues.actionQueue.empty
+ QueueExecutor ex = new QueueExecutor(queues)
+ ex.run();
+ // flush all stop commands from the queue
+ queues.flushActionQueue(ActionStopQueue.class)
+
+ assert queues.actionQueue.empty
+ }
+
+ @Test
+ public void testRenewedActionFiresOnceAtLeast() throws Throwable {
+ ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+ RenewingAction renewer = new RenewingAction(
+ note1,
+ 500,
+ 100,
+ TimeUnit.MILLISECONDS,
+ 3)
+ queues.schedule(renewer);
+ def stop = new ActionStopQueue(4, TimeUnit.SECONDS)
+ queues.schedule(stop);
+ // this runs all the delayed actions FIRST, so can't be used
+ // to play tricks of renewing actions ahead of the stop action
+ runQueuesToCompletion()
+ assert renewer.executionCount == 1
+ assert note1.executionCount == 1
+ // assert the renewed item is back in
+ assert queues.scheduledActions.contains(renewer)
+ }
+
+
+ @Test
+ public void testRenewingActionOperations() throws Throwable {
+ ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+ RenewingAction renewer = new RenewingAction(
+ note1,
+ 100,
+ 100,
+ TimeUnit.MILLISECONDS,
+ 3)
+ queues.renewing("note", renewer)
+ assert queues.removeRenewingAction("note")
+ queues.stop()
+ queues.waitForServiceToStop(10000)
+ }
+
+ public class ActionNoteExecuted extends AsyncAction {
+ public final AtomicBoolean executed = new AtomicBoolean(false);
+ public final AtomicLong executionTimeNanos = new AtomicLong()
+ private final AtomicLong executionCount = new AtomicLong()
+
+ public ActionNoteExecuted(String text, int delay) {
+ super(text, delay);
+ }
+
+ @Override
+ public void execute(
+ SliderAppMaster appMaster,
+ QueueAccess queueService,
+ AppState appState) throws Exception {
+ log.info("Executing $name");
+ executed.set(true);
+ executionTimeNanos.set(System.nanoTime())
+ executionCount.incrementAndGet()
+ log.info(this.toString())
+
+ synchronized (this) {
+ this.notify();
+ }
+ }
+
+ @Override
+ String toString() {
+ return super.toString() +
+ " executed=${executed.get()}; count=${executionCount.get()};"
+ }
+
+ long getExecutionCount() {
+ return executionCount.get()
+ }
+ }
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateContainerFailure.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
similarity index 77%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateContainerFailure.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index 9c17763..068b876 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateContainerFailure.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -21,8 +21,12 @@
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.ContainerId
+import org.apache.slider.api.ResourceKeys
+import org.apache.slider.core.conf.AggregateConf
+import org.apache.slider.core.conf.MapOperations
import org.apache.slider.core.exceptions.SliderException
import org.apache.slider.core.exceptions.TriggerClusterTeardownException
+import org.apache.slider.server.appmaster.actions.ResetFailureWindow
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
@@ -34,12 +38,12 @@
*/
@CompileStatic
@Slf4j
-class TestAppStateContainerFailure extends BaseMockAppStateTest
+class TestMockAppStateContainerFailure extends BaseMockAppStateTest
implements MockRoles {
@Override
String getTestName() {
- return "TestAppStateContainerFailure"
+ return "TestMockAppStateContainerFailure"
}
/**
@@ -52,6 +56,15 @@
return new MockYarnEngine(8000, 4)
}
+ @Override
+ AggregateConf buildInstanceDefinition() {
+ def aggregateConf = super.buildInstanceDefinition()
+ def globalOptions = aggregateConf.resourceOperations.globalOptions
+ globalOptions.put(ResourceKeys.CONTAINER_FAILURE_THRESHOLD, "10")
+
+ return aggregateConf
+ }
+
@Test
public void testShortLivedFail() throws Throwable {
@@ -153,7 +166,7 @@
ContainerId cid = ids[0]
log.info("$i instance $instances[0] $cid")
assert cid
- appState.onNodeManagerContainerStartFailed(cid, new SliderException("oops"))
+ appState.onNodeManagerContainerStartFailed(cid, new SliderException("failure #${i}"))
AppState.NodeCompletionResult result = appState.onCompletedNode(containerStatus(cid))
assert result.containerFailed
}
@@ -163,4 +176,33 @@
}
}
+
+ @Test
+ public void testFailureWindow() throws Throwable {
+
+ ResetFailureWindow resetter = new ResetFailureWindow();
+
+ // initial reset
+ resetter.execute(null, null, appState)
+
+ role0Status.desired = 1
+ for (int i = 0; i < 100; i++) {
+ resetter.execute(null, null, appState)
+ List<RoleInstance> instances = createAndSubmitNodes()
+ assert instances.size() == 1
+
+ List<ContainerId> ids = extractContainerIds(instances, 0)
+
+ ContainerId cid = ids[0]
+ log.info("$i instance $instances[0] $cid")
+ assert cid
+ appState.onNodeManagerContainerStartFailed(
+ cid,
+ new SliderException("failure #${i}"))
+ AppState.NodeCompletionResult result = appState.onCompletedNode(
+ containerStatus(cid))
+ assert result.containerFailed
+ }
+ }
+
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateDynamicRoles.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateDynamicRoles.groovy
similarity index 88%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateDynamicRoles.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateDynamicRoles.groovy
index 6e387d8..136e1ea 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateDynamicRoles.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateDynamicRoles.groovy
@@ -26,9 +26,10 @@
import org.apache.slider.server.appmaster.model.mock.MockRecordFactory
import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
import org.apache.slider.server.appmaster.state.AppState
import org.apache.slider.server.appmaster.state.RoleInstance
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector
import org.junit.Test
/**
@@ -36,12 +37,12 @@
*/
@CompileStatic
@Slf4j
-class TestAppStateDynamicRoles extends BaseMockAppStateTest
+class TestMockAppStateDynamicRoles extends BaseMockAppStateTest
implements MockRoles {
@Override
String getTestName() {
- return "TestAppStateDynamicRoles"
+ return "TestMockAppStateDynamicRoles"
}
/**
@@ -72,11 +73,13 @@
appState.buildInstance(
instance,
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath,
- null, null)
+ null,
+ null, new SimpleReleaseSelector())
}
@Test
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestFlexDynamicRoles.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexDynamicRoles.groovy
similarity index 94%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestFlexDynamicRoles.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexDynamicRoles.groovy
index 1693365..5c9dce9 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestFlexDynamicRoles.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexDynamicRoles.groovy
@@ -30,6 +30,7 @@
import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
import org.apache.slider.server.appmaster.state.AppState
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector
import org.apache.slider.server.avro.RoleHistoryWriter
import org.junit.Test
@@ -38,12 +39,12 @@
*/
@CompileStatic
@Slf4j
-class TestFlexDynamicRoles extends BaseMockAppStateTest
+class TestMockAppStateFlexDynamicRoles extends BaseMockAppStateTest
implements MockRoles {
@Override
String getTestName() {
- return "TestAppStateDynamicRoles"
+ return "TestMockAppStateFlexDynamicRoles"
}
/**
@@ -73,11 +74,12 @@
appState.buildInstance(instance,
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath,
- null, null)
+ null, null, new SimpleReleaseSelector())
}
@@ -176,11 +178,12 @@
appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES)
appState.buildInstance(
factory.newInstanceDefinition(0, 0, 0),
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath2,
- null, null)
+ null, null, new SimpleReleaseSelector())
historyWriter.read(fs, history, appState.roleHistory)
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
new file mode 100644
index 0000000..a7bf068
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.model.appstate
+
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.Container
+import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.state.AppState
+import org.apache.slider.server.appmaster.state.ContainerAssignment
+import org.apache.slider.server.appmaster.state.RoleInstance
+import org.junit.Test
+
+@Slf4j
+class TestMockAppStateFlexing extends BaseMockAppStateTest implements MockRoles {
+
+ @Override
+ String getTestName() {
+ return "TestMockAppStateFlexing"
+ }
+
+ @Test
+ public void testFlexDuringLaunchPhase() throws Throwable {
+ role0Status.desired = 1
+
+ List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
+ List<Container> allocations = engine.execute(ops)
+ List<ContainerAssignment> assignments = [];
+ List<AbstractRMOperation> releases = []
+ appState.onContainersAllocated(allocations, assignments, releases)
+ assert assignments.size() == 1
+ ContainerAssignment assigned = assignments[0]
+ Container target = assigned.container
+ RoleInstance ri = roleInstance(assigned)
+
+ ops = appState.reviewRequestAndReleaseNodes()
+ assert ops.empty
+
+ //now this is the start point.
+ appState.containerStartSubmitted(target, ri);
+
+ ops = appState.reviewRequestAndReleaseNodes()
+ assert ops.empty
+
+ RoleInstance ri2 = appState.innerOnNodeManagerContainerStarted(target.id)
+ }
+
+ @Test
+ public void testFlexBeforeAllocationPhase() throws Throwable {
+ role0Status.desired = 1
+
+ List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
+ assert !ops.empty
+ List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
+ assert ops2.empty
+ }
+
+
+ @Test
+ public void testFlexDownTwice() throws Throwable {
+ int r0 = 6
+ int r1 = 0
+ int r2 = 0
+ role0Status.desired = r0
+ role1Status.desired = r1
+ role2Status.desired = r2
+ List<RoleInstance> instances = createAndStartNodes()
+
+ int clusterSize = r0 + r1 + r2
+ assert instances.size() == clusterSize
+ log.info("shrinking cluster")
+ r0 = 4
+ role0Status.desired = r0
+ List<AppState.NodeCompletionResult> completionResults = []
+ instances = createStartAndStopNodes(completionResults)
+ assert instances.size() == 0
+ // assert two nodes were released
+ assert completionResults.size() == 2
+
+ // no-op review
+ completionResults = []
+ instances = createStartAndStopNodes(completionResults)
+ assert instances.size() == 0
+ // assert two nodes were released
+ assert completionResults.size() == 0
+
+
+ // now shrink again
+ role0Status.desired = r0 = 1
+ completionResults = []
+ instances = createStartAndStopNodes(completionResults)
+ assert instances.size() == 0
+ // assert two nodes were released
+ assert completionResults.size() == 3
+
+ }
+
+
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockRMOperations.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRMOperations.groovy
similarity index 83%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockRMOperations.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRMOperations.groovy
index 168ac9f..f8e852e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockRMOperations.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRMOperations.groovy
@@ -25,6 +25,10 @@
import org.apache.slider.server.appmaster.model.mock.MockFactory
import org.apache.slider.server.appmaster.model.mock.MockRMOperationHandler
import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.RMOperationHandler
import org.apache.slider.server.appmaster.state.*
import org.junit.Test
@@ -32,11 +36,11 @@
import static org.apache.slider.server.appmaster.state.ContainerPriority.extractRole
@Slf4j
-class TestMockRMOperations extends BaseMockAppStateTest implements MockRoles {
+class TestMockAppStateRMOperations extends BaseMockAppStateTest implements MockRoles {
@Override
String getTestName() {
- return "TestMockRMOperations"
+ return "TestMockAppStateRMOperations"
}
@Test
@@ -173,41 +177,4 @@
assert ri3 == null
}
- @Test
- public void testFlexDuringLaunchPhase() throws Throwable {
- role0Status.desired = 1
-
- List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
- List<Container> allocations = engine.execute(
- ops)
- List<ContainerAssignment> assignments = [];
- List<AbstractRMOperation> releases = []
- appState.onContainersAllocated(allocations, assignments, releases)
- assert assignments.size() == 1
- ContainerAssignment assigned = assignments[0]
- Container target = assigned.container
- RoleInstance ri = roleInstance(assigned)
-
- ops = appState.reviewRequestAndReleaseNodes()
- assert ops.empty
-
- //now this is the start point.
- appState.containerStartSubmitted(target, ri);
-
- ops = appState.reviewRequestAndReleaseNodes()
- assert ops.empty
-
- RoleInstance ri2 = appState.innerOnNodeManagerContainerStarted(target.id)
- }
-
- @Test
- public void testFlexBeforeAllocationPhase() throws Throwable {
- role0Status.desired = 1
-
- List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
- assert !ops.empty
- List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
- assert ops2.empty
- }
-
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRebuildOnAMRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRebuildOnAMRestart.groovy
similarity index 93%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRebuildOnAMRestart.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRebuildOnAMRestart.groovy
index 190e927..c2783f3 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRebuildOnAMRestart.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRebuildOnAMRestart.groovy
@@ -26,6 +26,7 @@
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockRecordFactory
import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
import org.apache.slider.server.appmaster.state.*
import org.junit.Test
@@ -34,12 +35,12 @@
*/
@CompileStatic
@Slf4j
-class TestAppStateRebuildOnAMRestart extends BaseMockAppStateTest
+class TestMockAppStateRebuildOnAMRestart extends BaseMockAppStateTest
implements MockRoles {
@Override
String getTestName() {
- return "TestAppStateRebuildOnAMRestart"
+ return "TestMockAppStateRebuildOnAMRestart"
}
@Test
@@ -71,11 +72,13 @@
//and rebuild
appState.buildInstance(
factory.newInstanceDefinition(r0, r1, r2),
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath,
- containers, null)
+ containers,
+ null, new SimpleReleaseSelector())
assert appState.getStartedCountainerCount() == clusterSize
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRolePlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRolePlacement.groovy
similarity index 91%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRolePlacement.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRolePlacement.groovy
index fba1ea0..17ebc31 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRolePlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRolePlacement.groovy
@@ -24,6 +24,9 @@
import org.apache.hadoop.yarn.client.api.AMRMClient
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
import org.apache.slider.server.appmaster.state.*
import org.junit.Test
@@ -35,12 +38,12 @@
*/
@CompileStatic
@Slf4j
-class TestAppStateRolePlacement extends BaseMockAppStateTest
+class TestMockAppStateRolePlacement extends BaseMockAppStateTest
implements MockRoles {
@Override
String getTestName() {
- return "TestAppStateRolePlacement"
+ return "TestMockAppStateRolePlacement"
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRoleRelease.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRoleRelease.groovy
similarity index 93%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRoleRelease.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRoleRelease.groovy
index f087a30..addfaa5 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRoleRelease.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRoleRelease.groovy
@@ -24,7 +24,7 @@
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
import org.apache.slider.server.appmaster.state.RoleInstance
import org.junit.Test
@@ -33,12 +33,12 @@
*/
@CompileStatic
@Slf4j
-class TestAppStateRoleRelease extends BaseMockAppStateTest
+class TestMockAppStateRoleRelease extends BaseMockAppStateTest
implements MockRoles {
@Override
String getTestName() {
- return "TestAppStateRolePlacement"
+ return "TestMockAppStateRoleRelease"
}
/**
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestContainerResourceAllocations.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockContainerResourceAllocations.groovy
similarity index 93%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestContainerResourceAllocations.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockContainerResourceAllocations.groovy
index a0b1100..73d40ee 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestContainerResourceAllocations.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockContainerResourceAllocations.groovy
@@ -26,8 +26,8 @@
import org.apache.slider.core.conf.ConfTreeOperations
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockRoles
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
-import org.apache.slider.server.appmaster.state.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
import org.junit.Test
/**
@@ -35,11 +35,11 @@
*/
@CompileStatic
@Slf4j
-class TestContainerResourceAllocations extends BaseMockAppStateTest {
+class TestMockContainerResourceAllocations extends BaseMockAppStateTest {
@Override
String getTestName() {
- "TestContainerResourceAllocations"
+ "TestMockContainerResourceAllocations"
}
@Test
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForRelease.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForRelease.groovy
deleted file mode 100644
index 92915dd..0000000
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForRelease.groovy
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.slider.server.appmaster.model.history
-
-import groovy.transform.CompileStatic
-import groovy.util.logging.Slf4j
-import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
-import org.apache.slider.server.appmaster.model.mock.MockFactory
-import org.apache.slider.server.appmaster.state.NodeInstance
-import org.apache.slider.server.appmaster.state.NodeMap
-import org.junit.Before
-import org.junit.Test
-
-@Slf4j
-@CompileStatic
-class TestFindNodesForRelease extends BaseMockAppStateTest {
-
-
- @Override
- String getTestName() {
- return "TestFindNodesForRelease"
- }
- NodeInstance age1Active4 = nodeInstance(1, 4, 0, 0)
- NodeInstance age2Active2 = nodeInstance(2, 2, 0, 0)
- NodeInstance age3Active0 = nodeInstance(3, 0, 0, 0)
- NodeInstance age4Active1 = nodeInstance(4, 1, 0, 0)
- NodeInstance empty = new NodeInstance("empty", MockFactory.ROLE_COUNT)
-
- List<NodeInstance> nodes = [age2Active2, age4Active1, age1Active4, age3Active0]
- NodeMap nodeMap = new NodeMap(MockFactory.ROLE_COUNT);
-
-
- @Before
- public void setupNodeMap() {
- nodeMap.insert(nodes)
- }
-
- private void assertReleased(
- int count,
- List<NodeInstance> expected,
- int role = 0) {
- List<NodeInstance> released = nodeMap.findNodesForRelease(role, count)
- assertListEquals(released, expected)
- }
- private void assertReleased(
- List<NodeInstance> expected,
- int role = 0) {
- List<NodeInstance> released = nodeMap.findNodesForRelease(role, expected.size())
- assertListEquals(released, expected)
- }
-
- @Test
- public void testListActiveNodes() throws Throwable {
- assertListEquals(nodeMap.listActiveNodes(0),
- [age1Active4,age2Active2, age4Active1])
- }
-
- @Test
- public void testReleaseMinus1() throws Throwable {
- try {
- nodeMap.findNodesForRelease(0, -1)
- fail("Expected an exception")
- } catch (IllegalArgumentException e) {
- }
- }
- @Test
- public void testReleaseO() throws Throwable {
- assertReleased(0, [])
- }
-
- @Test
- public void testRelease1() throws Throwable {
- assertReleased(1, [age1Active4])
- }
-
- @Test
- public void testRelease2() throws Throwable {
- assertReleased(2, [age1Active4, age1Active4])
- }
-
- @Test
- public void testRelease3() throws Throwable {
- assertReleased(3, [age1Active4, age1Active4, age1Active4 ])
- }
-
- @Test
- public void testRelease4() throws Throwable {
- assertReleased(4, [age1Active4, age1Active4, age1Active4 , age2Active2])
- }
-
- @Test
- public void testRelease5() throws Throwable {
- assertReleased([age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1])
- }
-
- @Test
- public void testRelease6() throws Throwable {
- assertReleased(
- [age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1, age1Active4])
- }
-
- @Test
- public void testRelease7() throws Throwable {
- assertReleased(
- [age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1,
- age1Active4, age2Active2])
- }
-
- @Test
- public void testRelease8() throws Throwable {
- assertReleased(8,
- [age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1,
- age1Active4, age2Active2])
- }
-
- @Test
- public void testPurgeInactiveTime3() throws Throwable {
- assert nodeMap.purgeUnusedEntries(3) == 0;
- }
-
- @Test
- public void testPurgeInactiveTime4() throws Throwable {
- assert nodeMap.purgeUnusedEntries(4) == 1;
- }
- @Test
- public void testPurgeInactiveTime5() throws Throwable {
- assert nodeMap.purgeUnusedEntries(5) == 1;
- }
-
-}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
index 795b48f..340e72d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
@@ -114,12 +114,12 @@
RoleStatus roleStatus = new RoleStatus(provRole)
//verify it is empty
- assert roleHistory.findNodesForRelease(role, 1).isEmpty()
+ assert roleHistory.listActiveNodes(role).empty
AMRMClient.ContainerRequest request =
roleHistory.requestNode(roleStatus, resource);
- List<String> nodes = request.getNodes()
+ List<String> nodes = request.nodes
assert nodes == null
//pick an idle host
@@ -128,7 +128,7 @@
//build a container
MockContainer container = factory.newContainer()
container.nodeId = new MockNodeId(hostname, 0)
- container.priority = request.getPriority()
+ container.priority = request.priority
roleHistory.onContainerAssigned(container);
NodeMap nodemap = roleHistory.cloneNodemap();
@@ -147,10 +147,11 @@
assert roleEntry.live == 1
// now pick that instance to destroy
+ List<NodeInstance> activeNodes = roleHistory.listActiveNodes(role)
- List<NodeInstance> forRelease = roleHistory.findNodesForRelease(role, 1)
- assert forRelease.size() == 1
- NodeInstance target = forRelease[0]
+
+ assert activeNodes.size() == 1
+ NodeInstance target = activeNodes[0]
assert target == allocated
roleHistory.onContainerReleaseSubmitted(container);
assert roleEntry.releasing == 1
@@ -158,19 +159,19 @@
assert roleEntry.active == 0
// release completed
- roleHistory.onReleaseCompleted(container)
+ roleHistory.onReleaseCompleted(container, true)
assert roleEntry.releasing == 0
assert roleEntry.live == 0
assert roleEntry.active == 0
// verify it is empty
- assert roleHistory.findNodesForRelease(role, 1).isEmpty()
+ assert roleHistory.listActiveNodes(role).empty
// ask for a container and expect to get the recently released one
AMRMClient.ContainerRequest request2 =
roleHistory.requestNode(roleStatus, resource);
- List<String> nodes2 = request2.getNodes()
+ List<String> nodes2 = request2.nodes
assert nodes2 != null
String hostname2 = nodes2[0]
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForNewInstances.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
similarity index 98%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForNewInstances.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
index dab03f5..79cd348 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForNewInstances.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
@@ -38,7 +38,7 @@
*/
@Slf4j
@CompileStatic
-class TestFindNodesForNewInstances extends BaseMockAppStateTest {
+class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
@Override
String getTestName() {
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestNIComparators.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
similarity index 97%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestNIComparators.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
index 77119d5..612cce8 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestNIComparators.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
@@ -26,7 +26,7 @@
/**
* Unit test to verify the comparators sort as expected
*/
-class TestNIComparators extends BaseMockAppStateTest {
+class TestRoleHistoryNIComparators extends BaseMockAppStateTest {
NodeInstance age1Active4 = nodeInstance(1000, 4, 0, 0)
NodeInstance age2Active2 = nodeInstance(1001, 2, 0, 0)
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestOutstandingRequestTracker.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
similarity index 96%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestOutstandingRequestTracker.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
index 8d1f4b0..7085678 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestOutstandingRequestTracker.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
@@ -24,7 +24,7 @@
import org.apache.slider.server.appmaster.state.OutstandingRequestTracker
import org.junit.Test
-class TestOutstandingRequestTracker extends BaseMockAppStateTest {
+class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest {
NodeInstance host1 = new NodeInstance("host1", 3)
NodeInstance host2 = new NodeInstance("host2", 3)
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRW.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRW.groovy
similarity index 99%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRW.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRW.groovy
index b646661..4242ba1 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRW.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRW.groovy
@@ -31,8 +31,8 @@
import org.junit.Test
@Slf4j
-@CompileStatic
-class TestHistoryRW extends BaseMockAppStateTest {
+//@CompileStatic
+class TestRoleHistoryRW extends BaseMockAppStateTest {
static long time = System.currentTimeMillis();
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRWOrdering.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRWOrdering.groovy
similarity index 98%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRWOrdering.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRWOrdering.groovy
index 6ec046c..a0663e8 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRWOrdering.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRWOrdering.groovy
@@ -33,7 +33,7 @@
import java.util.regex.Pattern
@Slf4j
-class TestHistoryRWOrdering extends BaseMockAppStateTest {
+class TestRoleHistoryRWOrdering extends BaseMockAppStateTest {
def paths = pathlist(
[
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index 628c729..f96a238 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -30,7 +30,9 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.slider.common.tools.SliderFileSystem
import org.apache.slider.common.tools.SliderUtils
+import org.apache.slider.core.conf.AggregateConf
import org.apache.slider.core.main.LauncherExitCodes
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
import org.apache.slider.server.appmaster.state.*
import org.apache.slider.test.SliderTestBase
import org.junit.Before
@@ -81,15 +83,31 @@
appState = new AppState(new MockRecordFactory())
appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES)
appState.buildInstance(
- factory.newInstanceDefinition(0, 0, 0),
+ buildInstanceDefinition(),
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath,
- null, null)
+ null, null,
+ new SimpleReleaseSelector())
}
- abstract String getTestName();
+ /**
+ * Override point, define the instance definition
+ * @return
+ */
+ public AggregateConf buildInstanceDefinition() {
+ factory.newInstanceDefinition(0, 0, 0)
+ }
+
+ /**
+ * Get the test name ... defaults to method name
+ * @return
+ */
+ String getTestName() {
+ methodName.methodName;
+ }
public RoleStatus getRole0Status() {
return appState.lookupRoleStatus(ROLE0)
@@ -166,20 +184,72 @@
* @return a list of roles
*/
protected List<RoleInstance> createAndStartNodes() {
- List<RoleInstance> instances = createAndSubmitNodes()
+ return createStartAndStopNodes([])
+ }
+
+ /**
+ * Create, Start and stop nodes
+ * @param completionResults List filled in with the status on all completed nodes
+ * @return the nodes
+ */
+ public List<RoleInstance> createStartAndStopNodes(
+ List<AppState.NodeCompletionResult> completionResults) {
+ List<ContainerId> released = []
+ List<RoleInstance> instances = createAndSubmitNodes(released)
for (RoleInstance instance : instances) {
assert appState.onNodeManagerContainerStarted(instance.containerId)
}
+ releaseContainers(completionResults,
+ released,
+ ContainerState.COMPLETE,
+ "released",
+ 0
+ )
return instances
}
/**
+ * Release a list of containers, updating the completion results
+ * @param completionResults
+ * @param containerIds
+ * @param containerState
+ * @param exitText
+ * @param containerExitCode
+ * @return
+ */
+ public def releaseContainers(
+ List<AppState.NodeCompletionResult> completionResults,
+ List<ContainerId> containerIds,
+ ContainerState containerState,
+ String exitText,
+ int containerExitCode) {
+ containerIds.each { ContainerId id ->
+ ContainerStatus status = ContainerStatus.newInstance(id,
+ containerState,
+ exitText,
+ containerExitCode)
+ completionResults << appState.onCompletedNode(status)
+
+ }
+ }
+
+ /**
* Create nodes and submit them
* @return a list of roles
*/
public List<RoleInstance> createAndSubmitNodes() {
+ return createAndSubmitNodes([])
+ }
+
+ /**
+ * Create nodes and submit them
+ * @param released a list that is built up of all released nodes
+ * @return a list of roles allocated
+ */
+ public List<RoleInstance> createAndSubmitNodes(
+ List<ContainerId> released) {
List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
- List<Container> allocatedContainers = engine.execute(ops)
+ List<Container> allocatedContainers = engine.execute(ops, released)
List<ContainerAssignment> assignments = [];
List<AbstractRMOperation> operations = []
appState.onContainersAllocated(allocatedContainers, assignments, operations)
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy
index 25bee36..3eba7c4 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy
@@ -20,7 +20,7 @@
import org.apache.hadoop.yarn.api.records.*
-class MockContainer extends Container{
+class MockContainer extends Container {
ContainerId id;
NodeId nodeId
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
index 7b73451..6db1ac5 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
@@ -18,11 +18,15 @@
package org.apache.slider.server.appmaster.model.mock
+import java.io.IOException;
+
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.service.LifecycleEvent
import org.apache.hadoop.service.ServiceStateChangeListener
import org.apache.hadoop.yarn.api.records.Container
+import org.apache.hadoop.yarn.api.records.ContainerId
+import org.apache.hadoop.yarn.client.api.AMRMClient
import org.apache.slider.api.ClusterDescription
import org.apache.slider.common.tools.SliderFileSystem
import org.apache.slider.core.conf.AggregateConf
@@ -33,7 +37,9 @@
import org.apache.slider.core.registry.info.ServiceInstanceData
import org.apache.slider.providers.ProviderRole
import org.apache.slider.providers.ProviderService
-import org.apache.slider.server.appmaster.AMViewForProviders
+import org.apache.slider.server.appmaster.actions.QueueAccess
+import org.apache.slider.server.appmaster.state.ContainerReleaseSelector
+import org.apache.slider.server.appmaster.state.MostRecentContainerReleaseSelector
import org.apache.slider.server.appmaster.state.StateAccessForProviders
import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations
import org.apache.slider.server.appmaster.web.rest.agent.HeartBeat
@@ -63,7 +69,8 @@
}
@Override
- public void validateInstanceDefinition(AggregateConf instanceDefinition) throws SliderException {
+ public void validateInstanceDefinition(AggregateConf instanceDefinition)
+ throws SliderException {
}
@Override
@@ -99,7 +106,7 @@
return null
}
- @Override
+ @Override
public long getStartTime() {
return 0;
}
@@ -130,7 +137,7 @@
}
@Override
- public Map<String,String> getBlockers() {
+ public Map<String, String> getBlockers() {
return null;
}
@@ -155,11 +162,17 @@
}
@Override
- public Configuration loadProviderConfigurationInformation(File confDir) throws BadCommandArgumentsException, IOException {
+ public Configuration loadProviderConfigurationInformation(File confDir)
+ throws BadCommandArgumentsException, IOException {
return null;
}
@Override
+ void initializeApplicationConfiguration(AggregateConf instanceDefinition,
+ SliderFileSystem fileSystem) throws IOException, SliderException {
+ }
+
+ @Override
public void validateApplicationConfiguration(
AggregateConf instanceDefinition,
File confDir,
@@ -168,7 +181,7 @@
@Override
- public Map<String,String> buildProviderStatus() {
+ public Map<String, String> buildProviderStatus() {
return null;
}
@@ -187,7 +200,8 @@
}
@Override
- public Map<String, String> buildMonitorDetails(ClusterDescription clusterSpec) {
+ public Map<String, String> buildMonitorDetails(
+ ClusterDescription clusterSpec) {
return null;
}
@@ -195,28 +209,29 @@
void bind(
StateAccessForProviders stateAccessor,
RegistryViewForProviders registry,
- AMViewForProviders amView) {
+ QueueAccess queueAccess,
+ List<Container> liveContainers) {
}
@Override
- AgentRestOperations getAgentRestOperations() {
- return new AgentRestOperations() {
- @Override
- public RegistrationResponse handleRegistration(Register registration) {
- // dummy impl
- RegistrationResponse response = new RegistrationResponse();
- response.setResponseStatus(RegistrationStatus.OK);
- return response;
- }
+ AgentRestOperations getAgentRestOperations() {
+ return new AgentRestOperations() {
+ @Override
+ public RegistrationResponse handleRegistration(Register registration) {
+ // dummy impl
+ RegistrationResponse response = new RegistrationResponse();
+ response.setResponseStatus(RegistrationStatus.OK);
+ return response;
+ }
- @Override
- public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
- // dummy impl
- return new HeartBeatResponse();
- }
- }
+ @Override
+ public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
+ // dummy impl
+ return new HeartBeatResponse();
+ }
}
+ }
@Override
void buildEndpointDetails(Map<String, String> details) {
@@ -225,8 +240,34 @@
@Override
void applyInitialRegistryDefinitions(
- URL unsecureWebAPI, URL secureWebAPI, ServiceInstanceData registryInstanceData)
+ URL unsecureWebAPI,
+ URL secureWebAPI,
+ ServiceInstanceData registryInstanceData)
throws MalformedURLException, IOException {
}
+
+ @Override
+ public void notifyContainerCompleted(ContainerId containerId) {
+ }
+
+ @Override
+ ContainerReleaseSelector createContainerReleaseSelector() {
+ return new MostRecentContainerReleaseSelector()
+ }
+
+ @Override
+ public void releaseAssignedContainer(ContainerId containerId) {
+ // no-op
+ }
+
+ @Override
+ public void addContainerRequest(AMRMClient.ContainerRequest req) {
+ // no-op
+ }
+
+ @Override
+ void rebuildContainerDetails(List<Container> liveContainers, String applicationId,
+ Map<Integer, ProviderRole> roleProviderMap) {
+ }
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
index 10a7708..0fdba6b 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
@@ -21,25 +21,28 @@
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.ContainerId
import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
-import org.apache.slider.server.appmaster.state.ContainerReleaseOperation
-import org.apache.slider.server.appmaster.state.ContainerRequestOperation
-import org.apache.slider.server.appmaster.state.RMOperationHandler
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.RMOperationHandler
@Slf4j
class MockRMOperationHandler extends RMOperationHandler {
public List<AbstractRMOperation> operations = [];
-
+ int requests, releases;
+
@Override
public void releaseAssignedContainer(ContainerId containerId) {
operations.add(new ContainerReleaseOperation(containerId))
log.info("Releasing container ID " + containerId.getId())
+ releases++;
}
@Override
public void addContainerRequest(AMRMClient.ContainerRequest req) {
operations.add(new ContainerRequestOperation(req))
log.info("Requesting container role #" + req.priority);
+ requests++;
}
/**
@@ -47,5 +50,7 @@
*/
public void clear() {
operations.clear()
+ releases = 0;
+ requests = 0;
}
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy
index 7ebdf52..f405188 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy
@@ -25,9 +25,9 @@
import org.apache.hadoop.yarn.api.records.Container
import org.apache.hadoop.yarn.api.records.ContainerId
import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
-import org.apache.slider.server.appmaster.state.ContainerReleaseOperation
-import org.apache.slider.server.appmaster.state.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
/**
* This is an evolving engine to mock YARN operations
@@ -86,9 +86,7 @@
* @param ops
* @return
*/
- List<Container> execute(
- List<AbstractRMOperation> ops
- ) {
+ List<Container> execute(List<AbstractRMOperation> ops) {
return execute(ops, [])
}
@@ -106,7 +104,7 @@
if (op instanceof ContainerReleaseOperation) {
ContainerReleaseOperation cro = (ContainerReleaseOperation) op
ContainerId cid = cro.containerId
- releaseContainer(cid);
+ assert releaseContainer(cid);
released.add(cid)
} else {
ContainerRequestOperation req = (ContainerRequestOperation) op
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
new file mode 100644
index 0000000..c789011
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.model.monkey
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.server.appmaster.actions.ActionHalt
+import org.apache.slider.server.appmaster.actions.ActionKillContainer
+import org.apache.slider.server.appmaster.actions.QueueService
+import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import org.apache.slider.server.appmaster.model.mock.MockRMOperationHandler
+import org.apache.slider.server.appmaster.monkey.ChaosKillAM
+import org.apache.slider.server.appmaster.monkey.ChaosKillContainer
+import org.apache.slider.server.appmaster.monkey.ChaosMonkeyService
+import org.apache.slider.server.appmaster.monkey.ChaosTarget
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.state.RoleInstance
+import org.junit.Before
+import org.junit.Test
+
+@CompileStatic
+@Slf4j
+class TestMockMonkey extends BaseMockAppStateTest {
+
+ /**
+ * This queue service is NOT started; tests need to poll the queue
+ * rather than expect them to execute
+ */
+ QueueService queues = new QueueService();
+ ChaosMonkeyService monkey = new ChaosMonkeyService(metricRegistry,
+ queues)
+
+ @Before
+ public void init() {
+ def configuration = new YarnConfiguration()
+ queues.init(configuration)
+ monkey.init(configuration)
+ }
+
+ @Test
+ public void testMonkeyStart() throws Throwable {
+ monkey.start()
+ monkey.stop()
+ }
+
+
+ @Test
+ public void testMonkeyPlay() throws Throwable {
+ ChaosCounter counter = new ChaosCounter()
+ monkey.addTarget("target", counter, ChaosMonkeyService.PERCENT_100)
+
+ monkey.play()
+ assert counter.count == 1
+ }
+
+ @Test
+ public void testMonkeyPlaySometimes() throws Throwable {
+ ChaosCounter counter = new ChaosCounter()
+ ChaosCounter counter2 = new ChaosCounter()
+ monkey.addTarget("target1", counter, ChaosMonkeyService.PERCENT_1 * 50)
+ monkey.addTarget("target2", counter2, ChaosMonkeyService.PERCENT_1 * 25)
+
+ for (int i = 0; i < 100; i++) {
+ monkey.play()
+ }
+ log.info("Counter1 = ${counter.count} counter2 = ${counter2.count}")
+ /*
+ * Relying on probability here to give approximate answers
+ */
+ assert counter.count > 25
+ assert counter.count < 75
+ assert counter2.count < counter.count
+ }
+
+ @Test
+ public void testAMKiller() throws Throwable {
+
+ def chaos = new ChaosKillAM(queues, -1)
+ chaos.chaosAction();
+ assert queues.scheduledActions.size() == 1
+ def action = queues.scheduledActions.take()
+ assert action instanceof ActionHalt
+ }
+
+
+ @Test
+ public void testContainerKillerEmptyApp() throws Throwable {
+
+
+ def chaos = new ChaosKillContainer(appState,
+ queues,
+ new MockRMOperationHandler())
+ chaos.chaosAction();
+ assert queues.scheduledActions.size() == 0
+ }
+
+
+
+ @Test
+ public void testContainerKiller() throws Throwable {
+ MockRMOperationHandler ops = new MockRMOperationHandler();
+ role0Status.desired = 1
+ List<RoleInstance> instances = createAndStartNodes()
+ assert instances.size() == 1
+ def instance = instances[0]
+
+ def chaos = new ChaosKillContainer(appState, queues, ops)
+ chaos.chaosAction();
+ assert queues.scheduledActions.size() == 1
+ def action = queues.scheduledActions.take()
+ ActionKillContainer killer = (ActionKillContainer) action
+ assert killer.containerId == instance.containerId;
+ killer.execute(null, queues, appState)
+ assert ops.releases == 1;
+
+ ContainerReleaseOperation operation = (ContainerReleaseOperation) ops.operations[0]
+ assert operation.containerId == instance.containerId
+ }
+
+
+
+ /**
+ * Chaos target that just implement a counter
+ */
+ private static class ChaosCounter implements ChaosTarget {
+ int count;
+
+ @Override
+ void chaosAction() {
+ count++;
+ }
+
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder(
+ "ChaosCounter{");
+ sb.append("count=").append(count);
+ sb.append('}');
+ return sb.toString();
+ }
+ }
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
index cd7c9d8..c1732e6 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
@@ -47,9 +47,8 @@
@Test
public void testRestURIs() throws Throwable {
- def clustername = "test_publisherws"
- createMiniCluster(
- clustername,
+ String clustername = createMiniCluster(
+ "",
configuration,
1,
1,
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy
index d632b25..28b484f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy
@@ -18,8 +18,16 @@
package org.apache.slider.test
+import com.codahale.metrics.MetricRegistry
import groovy.transform.CompileStatic
+import org.apache.hadoop.fs.FileUtil
+import org.apache.slider.common.SliderXMLConfKeysForTesting
import org.junit.Before
+import org.junit.BeforeClass
+import org.junit.Rule
+import org.junit.rules.TestName
+
+import java.nio.file.Files
/**
* Base class for unit tests as well as ones starting mini clusters
@@ -31,10 +39,25 @@
@CompileStatic
public abstract class SliderTestBase extends SliderTestUtils {
+ /**
+ * Singleton metric registry
+ */
+ public static final MetricRegistry metricRegistry = new MetricRegistry()
+
+ @Rule
+ public TestName methodName = new TestName();
+
+ @BeforeClass
+ public static void nameThread() {
+ Thread.currentThread().setName("JUnit");
+ }
+
@Before
public void setup() {
- //give our thread a name
- Thread.currentThread().name = "JUnit"
+ FileUtil.fullyDelete(new File(SliderXMLConfKeysForTesting.TEST_SECURITY_DIR))
}
+
+
+
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
index a250e55..3fc3e55 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
@@ -205,7 +205,7 @@
public static void waitUntilClusterLive(SliderClient client, int timeout) {
Duration duration = new Duration(timeout);
duration.start()
- while (!client.actionExists(client.deployedClusterName, true) &&
+ while (0 != client.actionExists(client.deployedClusterName, true) &&
!duration.limitExceeded) {
sleep(1000);
}
@@ -480,6 +480,8 @@
List args) {
ServiceLauncher<SliderClient> serviceLauncher =
new ServiceLauncher<SliderClient>(SliderClient.name);
+
+ log.debug("slider ${SliderUtils.join(args, " ", false)}")
serviceLauncher.launchService(conf,
toArray(args),
false);
@@ -492,6 +494,8 @@
Throwable {
ServiceLauncher serviceLauncher =
new ServiceLauncher(serviceClass.name);
+ log.debug("slider ${SliderUtils.join(args, " ", false)}")
+
serviceLauncher.launchService(conf,
toArray(args),
false);
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
index 2c1b270..b6f863b 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
@@ -136,13 +136,8 @@
}
}
-
- @Rule
- public TestName methodName = new TestName();
-
- @Before
- public void nameThread() {
- Thread.currentThread().setName("JUnit");
+ protected String buildClustername(String clustername) {
+ return clustername ?: createClusterName()
}
/**
@@ -152,7 +147,7 @@
*/
protected String createClusterName() {
def base = methodName.getMethodName().toLowerCase(Locale.ENGLISH)
- if (clusterCount++>1) {
+ if (clusterCount++ > 1) {
base += "-$clusterCount"
}
return base
@@ -162,7 +157,7 @@
@Override
void setup() {
super.setup()
- def testConf = getTestConfiguration();
+ def testConf = testConfiguration;
thawWaitTime = getTimeOptionMillis(testConf,
KEY_TEST_THAW_WAIT_TIME,
thawWaitTime)
@@ -230,15 +225,16 @@
/**
* Create and start a minicluster
- * @param name cluster/test name
+ * @param name cluster/test name; if empty one is created from the junit method
* @param conf configuration to use
* @param noOfNodeManagers #of NMs
* @param numLocalDirs #of local dirs
* @param numLogDirs #of log dirs
* @param startZK create a ZK micro cluster
* @param startHDFS create an HDFS mini cluster
+ * @return the name of the cluster
*/
- protected void createMiniCluster(String name,
+ protected String createMiniCluster(String name,
YarnConfiguration conf,
int noOfNodeManagers,
int numLocalDirs,
@@ -247,12 +243,14 @@
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 64);
conf.set(YarnConfiguration.RM_SCHEDULER, FIFO_SCHEDULER);
SliderUtils.patchConfiguration(conf)
+ name = buildClustername(name)
miniCluster = new MiniYARNCluster(name, noOfNodeManagers, numLocalDirs, numLogDirs)
miniCluster.init(conf)
miniCluster.start();
if (startHDFS) {
createMiniHDFSCluster(name, conf)
}
+ return name
}
/**
@@ -441,12 +439,16 @@
* @param clusterOps map of key=value cluster options to set with the --option arg
* @return launcher which will have executed the command.
*/
- public ServiceLauncher<SliderClient> createOrBuildCluster(String action, String clustername, Map<String, Integer> roles, List<String> extraArgs, boolean deleteExistingData, boolean blockUntilRunning, Map<String, String> clusterOps) {
+ public ServiceLauncher<SliderClient> createOrBuildCluster(String action, String clustername,
+ Map<String, Integer> roles, List<String> extraArgs, boolean deleteExistingData,
+ boolean blockUntilRunning, Map<String, String> clusterOps) {
assert clustername != null
assert miniCluster != null
- if (deleteExistingData) {
- HadoopFS dfs = HadoopFS.get(new URI(fsDefaultName), miniCluster.config)
- Path clusterDir = new SliderFileSystem(dfs, miniCluster.config).buildClusterDirPath(clustername)
+ // update action should keep existing data
+ def config = miniCluster.config
+ if (deleteExistingData && !SliderActions.ACTION_UPDATE.equals(action)) {
+ HadoopFS dfs = HadoopFS.get(new URI(fsDefaultName), config)
+ Path clusterDir = new SliderFileSystem(dfs, config).buildClusterDirPath(clustername)
log.info("deleting customer data at $clusterDir")
//this is a safety check to stop us doing something stupid like deleting /
assert clusterDir.toString().contains("/.slider/")
@@ -485,7 +487,7 @@
}
ServiceLauncher<SliderClient> launcher = launchClientAgainstMiniMR(
//config includes RM binding info
- new YarnConfiguration(miniCluster.config),
+ new YarnConfiguration(config),
//varargs list of command line params
argsList
)
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy
index 77d47f4..0259fb7 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy
@@ -95,19 +95,20 @@
* @param startZK create a ZK micro cluster
* @param startHDFS create an HDFS mini cluster
*/
- protected void createMiniCluster(String name,
+ protected String createMiniCluster(String name,
YarnConfiguration conf,
int noOfNodeManagers,
int numLocalDirs,
int numLogDirs,
boolean startZK,
boolean startHDFS) {
- createMiniCluster(name, conf, noOfNodeManagers, numLocalDirs, numLogDirs,
+ name = createMiniCluster(name, conf, noOfNodeManagers, numLocalDirs, numLogDirs,
startHDFS)
if (startZK) {
createMicroZKCluster(conf)
}
+ return name
}
/**
@@ -117,11 +118,24 @@
* @param noOfNodeManagers #of NMs
* @param startZK create a ZK micro cluster
*/
- protected void createMiniCluster(String name,
+ protected String createMiniCluster(String name,
YarnConfiguration conf,
int noOfNodeManagers,
boolean startZK) {
- createMiniCluster(name, conf, noOfNodeManagers, 1, 1, startZK, false)
+ return createMiniCluster(name, conf, noOfNodeManagers, 1, 1, startZK, false)
+ }
+
+ /**
+ * Create and start a minicluster with the name from the test method
+ * @param name cluster/test name
+ * @param conf configuration to use
+ * @param noOfNodeManagers #of NMs
+ * @param startZK create a ZK micro cluster
+ */
+ protected String createMiniCluster(YarnConfiguration conf,
+ int noOfNodeManagers,
+ boolean startZK) {
+ return createMiniCluster("", conf, noOfNodeManagers, 1, 1, startZK, false)
}
public void createMicroZKCluster(Configuration conf) {
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java
index c10b60a..6ed950f 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java
@@ -18,24 +18,21 @@
package org.apache.slider.providers.agent;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.Token;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.ClusterDescriptionKeys;
import org.apache.slider.api.ClusterNode;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.OptionKeys;
-import org.apache.slider.api.StatusKeys;
+import org.apache.slider.common.SliderXmlConfKeys;
import org.apache.slider.common.tools.SliderFileSystem;
import org.apache.slider.core.conf.AggregateConf;
import org.apache.slider.core.conf.ConfTree;
@@ -46,6 +43,7 @@
import org.apache.slider.providers.agent.application.metadata.Application;
import org.apache.slider.providers.agent.application.metadata.CommandOrder;
import org.apache.slider.providers.agent.application.metadata.Component;
+import org.apache.slider.providers.agent.application.metadata.ComponentExport;
import org.apache.slider.providers.agent.application.metadata.Export;
import org.apache.slider.providers.agent.application.metadata.ExportGroup;
import org.apache.slider.providers.agent.application.metadata.Metainfo;
@@ -57,6 +55,7 @@
import org.apache.slider.server.appmaster.state.StateAccessForProviders;
import org.apache.slider.server.appmaster.web.rest.agent.CommandReport;
import org.apache.slider.server.appmaster.web.rest.agent.ComponentStatus;
+import org.apache.slider.server.appmaster.web.rest.agent.ExecutionCommand;
import org.apache.slider.server.appmaster.web.rest.agent.HeartBeat;
import org.apache.slider.server.appmaster.web.rest.agent.HeartBeatResponse;
import org.apache.slider.server.appmaster.web.rest.agent.Register;
@@ -65,6 +64,7 @@
import org.junit.Assert;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
+import org.mockito.Matchers;
import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -80,12 +80,15 @@
import java.util.Map;
import java.util.Set;
+import static org.easymock.EasyMock.anyBoolean;
import static org.easymock.EasyMock.anyObject;
import static org.easymock.EasyMock.createNiceMock;
import static org.easymock.EasyMock.expect;
import static org.easymock.EasyMock.replay;
+import static org.junit.Assert.assertEquals;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyCollection;
+import static org.mockito.Matchers.anyMap;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doReturn;
@@ -105,6 +108,7 @@
+ " <version>0.96.0.2.1.1</version>\n"
+ " <type>YARN-APP</type>\n"
+ " <minHadoopVersion>2.1.0</minHadoopVersion>\n"
+ + " <exportedConfigs>hbase-site,global</exportedConfigs>\n"
+ " <exportGroups>\n"
+ " <exportGroup>\n"
+ " <name>QuickLinks</name>\n"
@@ -132,8 +136,20 @@
+ " </commandOrders>\n"
+ " <components>\n"
+ " <component>\n"
+ + " <name>HBASE_REST</name>\n"
+ + " <category>MASTER</category>\n"
+ + " <commandScript>\n"
+ + " <script>scripts/hbase_rest.py</script>\n"
+ + " <scriptType>PYTHON</scriptType>\n"
+ + " <timeout>600</timeout>\n"
+ + " </commandScript>\n"
+ + " </component>\n"
+ + " <component>\n"
+ " <name>HBASE_MASTER</name>\n"
+ " <category>MASTER</category>\n"
+ + " <publishConfig>true</publishConfig>\n"
+ + " <autoStartOnFailure>true</autoStartOnFailure>\n"
+ + " <appExports>QuickLinks-JMX_Endpoint,QuickLinks-Master_Status</appExports>\n"
+ " <minInstanceCount>1</minInstanceCount>\n"
+ " <maxInstanceCount>2</maxInstanceCount>\n"
+ " <commandScript>\n"
@@ -146,10 +162,21 @@
+ " <name>HBASE_REGIONSERVER</name>\n"
+ " <category>SLAVE</category>\n"
+ " <minInstanceCount>1</minInstanceCount>\n"
+ + " <autoStartOnFailure>Falsee</autoStartOnFailure>\n"
+ " <commandScript>\n"
+ " <script>scripts/hbase_regionserver.py</script>\n"
+ " <scriptType>PYTHON</scriptType>\n"
+ " </commandScript>\n"
+ + " <componentExports>\n"
+ + " <componentExport>\n"
+ + " <name>PropertyA</name>\n"
+ + " <value>${THIS_HOST}:${site.global.listen_port}</value>\n"
+ + " </componentExport>\n"
+ + " <componentExport>\n"
+ + " <name>PropertyB</name>\n"
+ + " <value>AConstant</value>\n"
+ + " </componentExport>\n"
+ + " </componentExports>\n"
+ " </component>\n"
+ " </components>\n"
+ " <osSpecifics>\n"
@@ -205,7 +232,7 @@
public void testRegistration() throws IOException {
ConfTree tree = new ConfTree();
- tree.global.put(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
+ tree.global.put(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
AgentProviderService aps = new AgentProviderService();
ContainerLaunchContext ctx = createNiceMock(ContainerLaunchContext.class);
@@ -242,6 +269,10 @@
metainfo.setApplication(new Application());
doReturn(metainfo).when(mockAps).getApplicationMetainfo(any(SliderFileSystem.class), anyString());
+ Configuration conf = new Configuration();
+ conf.set(SliderXmlConfKeys.REGISTRY_PATH,
+ SliderXmlConfKeys.DEFAULT_REGISTRY_PATH);
+
try {
doReturn(true).when(mockAps).isMaster(anyString());
doNothing().when(mockAps).addInstallCommand(
@@ -249,21 +280,28 @@
eq("mockcontainer_1"),
any(HeartBeatResponse.class),
eq("scripts/hbase_master.py"));
+ doReturn(conf).when(mockAps).getConfig();
} catch (SliderException e) {
}
+ doNothing().when(mockAps).processAllocatedPorts(
+ anyString(),
+ anyString(),
+ anyString(),
+ anyMap()
+ );
expect(access.isApplicationLive()).andReturn(true).anyTimes();
ClusterDescription desc = new ClusterDescription();
- desc.setInfo(StatusKeys.INFO_AM_HOSTNAME, "host1");
- desc.setInfo(StatusKeys.INFO_AM_AGENT_PORT, "8088");
- desc.setInfo(StatusKeys.INFO_AM_SECURED_AGENT_PORT, "8089");
+ desc.setOption(OptionKeys.ZOOKEEPER_QUORUM, "host1:2181");
desc.setInfo(OptionKeys.APPLICATION_NAME, "HBASE");
expect(access.getClusterStatus()).andReturn(desc).anyTimes();
AggregateConf aggConf = new AggregateConf();
ConfTreeOperations treeOps = aggConf.getAppConfOperations();
treeOps.getOrAddComponent("HBASE_MASTER").put(AgentKeys.WAIT_HEARTBEAT, "0");
+ treeOps.set(OptionKeys.APPLICATION_NAME, "HBASE");
expect(access.getInstanceDefinitionSnapshot()).andReturn(aggConf);
+ expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
replay(access, ctx, container, sliderFileSystem);
try {
@@ -276,17 +314,30 @@
resourceComponent,
appComponent,
containerTmpDirPath);
- } catch (SliderException | IOException he) {
+ // JDK7
+ } catch (IOException he) {
+ log.warn("{}", he, he);
+ } catch (SliderException he) {
log.warn("{}", he, he);
}
Register reg = new Register();
reg.setResponseId(0);
reg.setHostname("mockcontainer_1___HBASE_MASTER");
+ Map<String,String> ports = new HashMap();
+ ports.put("a","100");
+ reg.setAllocatedPorts(ports);
RegistrationResponse resp = mockAps.handleRegistration(reg);
Assert.assertEquals(0, resp.getResponseId());
Assert.assertEquals(RegistrationStatus.OK, resp.getResponseStatus());
+ Mockito.verify(mockAps, Mockito.times(1)).processAllocatedPorts(
+ anyString(),
+ anyString(),
+ anyString(),
+ anyMap()
+ );
+
HeartBeat hb = new HeartBeat();
hb.setResponseId(1);
hb.setHostname("mockcontainer_1___HBASE_MASTER");
@@ -302,16 +353,16 @@
public ClusterDescription getClusterStatus() {
ClusterDescription cd = new ClusterDescription();
cd.status = new HashMap<String, Object>();
- Map<String, Map<String, ClusterNode>> roleMap = new HashMap<>();
- ClusterNode cn1 = new ClusterNode(new MyContainerId(1));
+ Map<String, Map<String, ClusterNode>> roleMap = new HashMap<String, Map<String, ClusterNode>>();
+ ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
cn1.host = "FIRST_HOST";
- Map<String, ClusterNode> map1 = new HashMap<>();
+ Map<String, ClusterNode> map1 = new HashMap<String, ClusterNode>();
map1.put("FIRST_CONTAINER", cn1);
- ClusterNode cn2 = new ClusterNode(new MyContainerId(2));
+ ClusterNode cn2 = new ClusterNode(new MockContainerId(2));
cn2.host = "SECOND_HOST";
- Map<String, ClusterNode> map2 = new HashMap<>();
+ Map<String, ClusterNode> map2 = new HashMap<String, ClusterNode>();
map2.put("SECOND_CONTAINER", cn2);
- ClusterNode cn3 = new ClusterNode(new MyContainerId(3));
+ ClusterNode cn3 = new ClusterNode(new MockContainerId(3));
cn3.host = "THIRD_HOST";
map2.put("THIRD_CONTAINER", cn3);
@@ -335,7 +386,7 @@
};
aps.setAmState(appState);
- Map<String, String> tokens = new HashMap<>();
+ Map<String, String> tokens = new HashMap<String, String>();
aps.addRoleRelatedTokens(tokens);
Assert.assertEquals(2, tokens.size());
Assert.assertEquals("FIRST_HOST", tokens.get("${FIRST_ROLE_HOST}"));
@@ -344,6 +395,45 @@
}
@Test
+ public void testComponentSpecificPublishes() throws Exception {
+ InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
+ Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
+ AgentProviderService aps = new AgentProviderService();
+ AgentProviderService mockAps = Mockito.spy(aps);
+ doNothing().when(mockAps).publishApplicationInstanceData(anyString(), anyString(), anyCollection());
+ doReturn(metainfo).when(mockAps).getMetainfo();
+
+ Map<String, String> ports = new HashMap<String, String>();
+ ports.put("global.listen_port", "10010");
+ mockAps.processAndPublishComponentSpecificData(ports,
+ "cid1",
+ "host1",
+ "HBASE_REGIONSERVER");
+ ArgumentCaptor<Collection> entriesCaptor = ArgumentCaptor.
+ forClass(Collection.class);
+ ArgumentCaptor<String> publishNameCaptor = ArgumentCaptor.
+ forClass(String.class);
+ Mockito.verify(mockAps, Mockito.times(1)).publishApplicationInstanceData(
+ anyString(),
+ publishNameCaptor.capture(),
+ entriesCaptor.capture());
+ assert entriesCaptor.getAllValues().size() == 1;
+ for (Collection coll : entriesCaptor.getAllValues()) {
+ Set<Map.Entry<String, String>> entrySet = (Set<Map.Entry<String, String>>) coll;
+ for (Map.Entry entry : entrySet) {
+ log.info("{}:{}", entry.getKey(), entry.getValue().toString());
+ if (entry.getKey().equals("PropertyA")) {
+ assert entry.getValue().toString().equals("host1:10010");
+ }
+ }
+ }
+ assert publishNameCaptor.getAllValues().size() == 1;
+ for (String coll : publishNameCaptor.getAllValues()) {
+ assert coll.equals("ComponentInstanceData");
+ }
+ }
+
+ @Test
public void testProcessConfig() throws Exception {
InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
@@ -354,38 +444,40 @@
status.setClusterName("test");
status.setComponentName("HBASE_MASTER");
status.setRoleCommand("GET_CONFIG");
- Map<String, String> hbaseSite = new HashMap<>();
+ Map<String, String> hbaseSite = new HashMap<String, String>();
hbaseSite.put("hbase.master.info.port", "60012");
hbaseSite.put("c", "d");
- Map<String, Map<String, String>> configs = new HashMap<>();
+ Map<String, Map<String, String>> configs =
+ new HashMap<String, Map<String, String>>();
configs.put("hbase-site", hbaseSite);
configs.put("global", hbaseSite);
status.setConfigs(configs);
- hb.setComponentStatus(new ArrayList<>(Arrays.asList(status)));
+ hb.setComponentStatus(new ArrayList<ComponentStatus>(Arrays.asList(status)));
- Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<>();
- Map<String, ClusterNode> container = new HashMap<>();
- ClusterNode cn1 = new ClusterNode(new MyContainerId(1));
+ Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<String, Map<String, ClusterNode>>();
+ Map<String, ClusterNode> container = new HashMap<String, ClusterNode>();
+ ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
cn1.host = "HOST1";
container.put("cid1", cn1);
roleClusterNodeMap.put("HBASE_MASTER", container);
- ComponentInstanceState componentStatus = new ComponentInstanceState("HBASE_MASTER", "aid", "cid");
+ ComponentInstanceState componentStatus = new ComponentInstanceState("HBASE_MASTER",
+ new MockContainerId(1), "cid");
AgentProviderService mockAps = Mockito.spy(aps);
- doNothing().when(mockAps).publishComponentConfiguration(anyString(), anyString(), anyCollection());
+ doNothing().when(mockAps).publishApplicationInstanceData(anyString(), anyString(), anyCollection());
doReturn(metainfo).when(mockAps).getMetainfo();
doReturn(roleClusterNodeMap).when(mockAps).getRoleClusterNodeMapping();
- mockAps.processReturnedStatus(hb, componentStatus);
- assert componentStatus.getConfigReported() == true;
- ArgumentCaptor<Collection> commandCaptor = ArgumentCaptor.
+ mockAps.publishConfigAndExportGroups(hb, componentStatus, "HBASE_MASTER");
+ Assert.assertTrue(componentStatus.getConfigReported());
+ ArgumentCaptor<Collection> entriesCaptor = ArgumentCaptor.
forClass(Collection.class);
- Mockito.verify(mockAps, Mockito.times(3)).publishComponentConfiguration(
+ Mockito.verify(mockAps, Mockito.times(3)).publishApplicationInstanceData(
anyString(),
anyString(),
- commandCaptor.capture());
- assert commandCaptor.getAllValues().size() == 3;
- for (Collection coll : commandCaptor.getAllValues()) {
+ entriesCaptor.capture());
+ Assert.assertEquals(3, entriesCaptor.getAllValues().size());
+ for (Collection coll : entriesCaptor.getAllValues()) {
Set<Map.Entry<String, String>> entrySet = (Set<Map.Entry<String, String>>) coll;
for (Map.Entry entry : entrySet) {
log.info("{}:{}", entry.getKey(), entry.getValue().toString());
@@ -394,6 +486,16 @@
}
}
}
+
+ Map<String, String> exports = mockAps.getCurrentExports("QuickLinks");
+ Assert.assertEquals(2, exports.size());
+ Assert.assertEquals(exports.get("JMX_Endpoint"), "http://HOST1:60012/jmx");
+
+ mockAps.publishConfigAndExportGroups(hb, componentStatus, "HBASE_REST");
+ Mockito.verify(mockAps, Mockito.times(3)).publishApplicationInstanceData(
+ anyString(),
+ anyString(),
+ entriesCaptor.capture());
}
@Test
@@ -404,32 +506,47 @@
Application application = metainfo.getApplication();
log.info("Service: " + application.toString());
Assert.assertEquals(application.getName(), "HBASE");
- Assert.assertEquals(application.getComponents().size(), 2);
+ Assert.assertEquals(application.getExportedConfigs(), "hbase-site,global");
+ Assert.assertEquals(application.getComponents().size(), 3);
List<Component> components = application.getComponents();
int found = 0;
for (Component component : components) {
if (component.getName().equals("HBASE_MASTER")) {
+ Assert.assertEquals(component.getAutoStartOnFailure(), "true");
+ Assert.assertEquals(component.getRequiresAutoRestart(), Boolean.TRUE);
Assert.assertEquals(component.getMinInstanceCount(), "1");
Assert.assertEquals(component.getMaxInstanceCount(), "2");
Assert.assertEquals(component.getCommandScript().getScript(), "scripts/hbase_master.py");
Assert.assertEquals(component.getCategory(), "MASTER");
+ Assert.assertEquals(component.getComponentExports().size(), 0);
+ Assert.assertEquals(component.getAppExports(), "QuickLinks-JMX_Endpoint,QuickLinks-Master_Status");
found++;
}
if (component.getName().equals("HBASE_REGIONSERVER")) {
+ Assert.assertEquals(component.getAutoStartOnFailure(), "Falsee");
+ Assert.assertEquals(component.getRequiresAutoRestart(), Boolean.FALSE);
Assert.assertEquals(component.getMinInstanceCount(), "1");
Assert.assertNull(component.getMaxInstanceCount());
Assert.assertEquals(component.getCommandScript().getScript(), "scripts/hbase_regionserver.py");
Assert.assertEquals(component.getCategory(), "SLAVE");
+ Assert.assertEquals(component.getComponentExports().size(), 2);
+ List<ComponentExport> es = component.getComponentExports();
+ ComponentExport e = es.get(0);
+ Assert.assertEquals(e.getName(), "PropertyA");
+ Assert.assertEquals(e.getValue(), "${THIS_HOST}:${site.global.listen_port}");
+ e = es.get(1);
+ Assert.assertEquals(e.getName(), "PropertyB");
+ Assert.assertEquals(e.getValue(), "AConstant");
found++;
}
}
Assert.assertEquals(found, 2);
- assert application.getExportGroups().size() == 1;
+ Assert.assertEquals(application.getExportGroups().size(), 1);
List<ExportGroup> egs = application.getExportGroups();
ExportGroup eg = egs.get(0);
- assert eg.getName().equals("QuickLinks");
- assert eg.getExports().size() == 2;
+ Assert.assertEquals(eg.getName(), "QuickLinks");
+ Assert.assertEquals(eg.getExports().size(), 2);
found = 0;
for (Export export : eg.getExports()) {
@@ -490,18 +607,18 @@
String role_hm = "HBASE_MASTER";
String role_hrs = "HBASE_REGIONSERVER";
- AgentProviderService aps = new AgentProviderService();
- AgentProviderService mockAps = Mockito.spy(aps);
+ AgentProviderService aps1 = new AgentProviderService();
+ AgentProviderService mockAps = Mockito.spy(aps1);
doReturn(metainfo).when(mockAps).getMetainfo();
- AgentProviderService mockAps2 = Mockito.spy(aps);
+ AgentProviderService mockAps2 = Mockito.spy(aps1);
doReturn(metainfo2).when(mockAps2).getMetainfo();
Assert.assertTrue(mockAps.isMaster(role_hm));
Assert.assertFalse(mockAps.isMaster(role_hrs));
- Assert.assertFalse(mockAps.canPublishConfig(role_hm));
+ Assert.assertTrue(mockAps.canPublishConfig(role_hm));
Assert.assertFalse(mockAps.canPublishConfig(role_hrs));
- Assert.assertFalse(mockAps.canAnyMasterPublishConfig());
+ Assert.assertTrue(mockAps.canAnyMasterPublishConfig());
Assert.assertTrue(mockAps2.isMaster(role_hm));
Assert.assertFalse(mockAps2.isMaster(role_hrs));
@@ -511,13 +628,13 @@
}
@Test
- public void testOrchastratedAppStart() throws IOException {
+ public void testOrchestratedAppStart() throws IOException {
// App has two components HBASE_MASTER and HBASE_REGIONSERVER
// Start of HBASE_RS depends on the start of HBASE_MASTER
InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
ConfTree tree = new ConfTree();
- tree.global.put(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
+ tree.global.put(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
AgentProviderService aps = new AgentProviderService();
ContainerLaunchContext ctx = createNiceMock(ContainerLaunchContext.class);
@@ -552,6 +669,10 @@
doReturn(access).when(mockAps).getAmState();
doReturn(metainfo).when(mockAps).getApplicationMetainfo(any(SliderFileSystem.class), anyString());
+ Configuration conf = new Configuration();
+ conf.set(SliderXmlConfKeys.REGISTRY_PATH,
+ SliderXmlConfKeys.DEFAULT_REGISTRY_PATH);
+
try {
doReturn(true).when(mockAps).isMaster(anyString());
doNothing().when(mockAps).addInstallCommand(
@@ -563,24 +684,23 @@
anyString(),
anyString(),
any(HeartBeatResponse.class),
- anyString());
+ anyString(),
+ Matchers.anyBoolean());
doNothing().when(mockAps).addGetConfigCommand(
anyString(),
anyString(),
any(HeartBeatResponse.class));
- doNothing().when(mockAps).publishComponentConfiguration(
+ doNothing().when(mockAps).publishApplicationInstanceData(
anyString(),
anyString(),
anyCollection());
-
+ doReturn(conf).when(mockAps).getConfig();
} catch (SliderException e) {
}
expect(access.isApplicationLive()).andReturn(true).anyTimes();
ClusterDescription desc = new ClusterDescription();
- desc.setInfo(StatusKeys.INFO_AM_HOSTNAME, "host1");
- desc.setInfo(StatusKeys.INFO_AM_AGENT_PORT, "8088");
- desc.setInfo(StatusKeys.INFO_AM_SECURED_AGENT_PORT, "8089");
+ desc.setOption(OptionKeys.ZOOKEEPER_QUORUM, "host1:2181");
desc.setInfo(OptionKeys.APPLICATION_NAME, "HBASE");
expect(access.getClusterStatus()).andReturn(desc).anyTimes();
@@ -588,7 +708,9 @@
ConfTreeOperations treeOps = aggConf.getAppConfOperations();
treeOps.getOrAddComponent("HBASE_MASTER").put(AgentKeys.WAIT_HEARTBEAT, "0");
treeOps.getOrAddComponent("HBASE_REGIONSERVER").put(AgentKeys.WAIT_HEARTBEAT, "0");
+ treeOps.set(OptionKeys.APPLICATION_NAME, "HBASE");
expect(access.getInstanceDefinitionSnapshot()).andReturn(aggConf).anyTimes();
+ expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
replay(access, ctx, container, sliderFileSystem);
// build two containers
@@ -665,7 +787,8 @@
Mockito.verify(mockAps, Mockito.times(0)).addStartCommand(anyString(),
anyString(),
any(HeartBeatResponse.class),
- anyString());
+ anyString(),
+ Matchers.anyBoolean());
// RS still does not start
hb = new HeartBeat();
hb.setResponseId(3);
@@ -675,7 +798,8 @@
Mockito.verify(mockAps, Mockito.times(0)).addStartCommand(anyString(),
anyString(),
any(HeartBeatResponse.class),
- anyString());
+ anyString(),
+ Matchers.anyBoolean());
// MASTER succeeds install and issues start
hb = new HeartBeat();
@@ -685,7 +809,7 @@
cr.setRole("HBASE_MASTER");
cr.setRoleCommand("INSTALL");
cr.setStatus("COMPLETED");
- Map<String, String> ap = new HashMap<>();
+ Map<String, String> ap = new HashMap<String, String>();
ap.put("a.port", "10233");
cr.setAllocatedPorts(ap);
hb.setReports(Arrays.asList(cr));
@@ -694,7 +818,8 @@
Mockito.verify(mockAps, Mockito.times(1)).addStartCommand(anyString(),
anyString(),
any(HeartBeatResponse.class),
- anyString());
+ anyString(),
+ Matchers.anyBoolean());
Map<String, String> allocatedPorts = mockAps.getAllocatedPorts();
Assert.assertTrue(allocatedPorts != null);
Assert.assertTrue(allocatedPorts.size() == 1);
@@ -709,7 +834,8 @@
Mockito.verify(mockAps, Mockito.times(1)).addStartCommand(anyString(),
anyString(),
any(HeartBeatResponse.class),
- anyString());
+ anyString(),
+ Matchers.anyBoolean());
// MASTER succeeds start
hb = new HeartBeat();
hb.setResponseId(3);
@@ -733,18 +859,110 @@
Mockito.verify(mockAps, Mockito.times(2)).addStartCommand(anyString(),
anyString(),
any(HeartBeatResponse.class),
- anyString());
- } catch (SliderException | IOException he) {
+ anyString(),
+ Matchers.anyBoolean());
+ // JDK7
+ } catch (SliderException he) {
+ log.warn(he.getMessage());
+ } catch (IOException he) {
log.warn(he.getMessage());
}
- Mockito.verify(mockAps, Mockito.times(1)).publishComponentConfiguration(
+ Mockito.verify(mockAps, Mockito.times(1)).publishApplicationInstanceData(
anyString(),
anyString(),
anyCollection());
}
@Test
+ public void testNotifyContainerCompleted() {
+ AgentProviderService aps = new AgentProviderService();
+ AgentProviderService mockAps = Mockito.spy(aps);
+ doNothing().when(mockAps).publishApplicationInstanceData(anyString(), anyString(), anyCollection());
+
+ ContainerId cid = new MockContainerId(1);
+ String id = cid.toString();
+ ContainerId cid2 = new MockContainerId(2);
+ mockAps.getAllocatedPorts().put("a", "100");
+ mockAps.getAllocatedPorts(id).put("b", "101");
+ mockAps.getAllocatedPorts("cid2").put("c", "102");
+
+ mockAps.getComponentInstanceData().put("cid2", new HashMap<String, String>());
+ mockAps.getComponentInstanceData().put(id, new HashMap<String, String>());
+
+ mockAps.getComponentStatuses().put("cid2_HM", new ComponentInstanceState("HM", cid2, "aid"));
+ mockAps.getComponentStatuses().put(id + "_HM", new ComponentInstanceState("HM", cid, "aid"));
+
+ Assert.assertNotNull(mockAps.getComponentInstanceData().get(id));
+ Assert.assertNotNull(mockAps.getComponentInstanceData().get("cid2"));
+
+ Assert.assertNotNull(mockAps.getComponentStatuses().get(id + "_HM"));
+ Assert.assertNotNull(mockAps.getComponentStatuses().get("cid2_HM"));
+
+ Assert.assertEquals(mockAps.getAllocatedPorts().size(), 1);
+ Assert.assertEquals(mockAps.getAllocatedPorts(id).size(), 1);
+ Assert.assertEquals(mockAps.getAllocatedPorts("cid2").size(), 1);
+
+ // Make the call
+ mockAps.notifyContainerCompleted(new MockContainerId(1));
+
+ Assert.assertEquals(mockAps.getAllocatedPorts().size(), 1);
+ Assert.assertEquals(mockAps.getAllocatedPorts(id).size(), 0);
+ Assert.assertEquals(mockAps.getAllocatedPorts("cid2").size(), 1);
+
+ Assert.assertNull(mockAps.getComponentInstanceData().get(id));
+ Assert.assertNotNull(mockAps.getComponentInstanceData().get("cid2"));
+
+ Assert.assertNull(mockAps.getComponentStatuses().get(id + "_HM"));
+ Assert.assertNotNull(mockAps.getComponentStatuses().get("cid2_HM"));
+ }
+
+ @Test
+ public void testAddInstallCommand() throws Exception {
+ InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
+ Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
+ AgentProviderService aps = new AgentProviderService();
+ HeartBeatResponse hbr = new HeartBeatResponse();
+
+ StateAccessForProviders access = createNiceMock(StateAccessForProviders.class);
+ AgentProviderService mockAps = Mockito.spy(aps);
+ doReturn(access).when(mockAps).getAmState();
+
+ AggregateConf aggConf = new AggregateConf();
+ ConfTreeOperations treeOps = aggConf.getAppConfOperations();
+ treeOps.getGlobalOptions().put(AgentKeys.JAVA_HOME, "java_home");
+ treeOps.set(OptionKeys.APPLICATION_NAME, "HBASE");
+ treeOps.set("site.fs.defaultFS", "hdfs://HOST1:8020/");
+ treeOps.set("internal.data.dir.path", "hdfs://HOST1:8020/database");
+ treeOps.set(OptionKeys.ZOOKEEPER_HOSTS, "HOST1");
+
+ expect(access.getAppConfSnapshot()).andReturn(treeOps).anyTimes();
+ expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
+ expect(access.isApplicationLive()).andReturn(true).anyTimes();
+
+ doReturn("HOST1").when(mockAps).getClusterInfoPropertyValue(anyString());
+ doReturn(metainfo).when(mockAps).getMetainfo();
+
+ Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<String, Map<String, ClusterNode>>();
+ Map<String, ClusterNode> container = new HashMap<String, ClusterNode>();
+ ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
+ cn1.host = "HOST1";
+ container.put("cid1", cn1);
+ roleClusterNodeMap.put("HBASE_MASTER", container);
+ doReturn(roleClusterNodeMap).when(mockAps).getRoleClusterNodeMapping();
+
+ replay(access);
+
+ mockAps.addInstallCommand("HBASE_MASTER", "cid1", hbr, "");
+ ExecutionCommand cmd = hbr.getExecutionCommands().get(0);
+ String pkgs = cmd.getHostLevelParams().get(AgentKeys.PACKAGE_LIST);
+ Assert.assertEquals("[{\"type\":\"tarball\",\"name\":\"files/hbase-0.96.1-hadoop2-bin.tar.gz\"}]", pkgs);
+ Assert.assertEquals("java_home", cmd.getHostLevelParams().get(AgentKeys.JAVA_HOME));
+ Assert.assertEquals("cid1", cmd.getHostLevelParams().get("container_id"));
+ Assert.assertEquals(Command.INSTALL.toString(), cmd.getRoleCommand());
+ }
+
+ @Test
public void testAddStartCommand() throws Exception {
AgentProviderService aps = new AgentProviderService();
HeartBeatResponse hbr = new HeartBeatResponse();
@@ -763,6 +981,8 @@
treeOps.set("config_types", "hbase-site");
treeOps.getGlobalOptions().put("site.hbase-site.a.port", "${HBASE_MASTER.ALLOCATED_PORT}");
treeOps.getGlobalOptions().put("site.hbase-site.b.port", "${HBASE_MASTER.ALLOCATED_PORT}");
+ treeOps.getGlobalOptions().put("site.hbase-site.random.port", "${HBASE_MASTER.ALLOCATED_PORT}{DO_NOT_PROPAGATE}");
+ treeOps.getGlobalOptions().put("site.hbase-site.random2.port", "${HBASE_MASTER.ALLOCATED_PORT}");
expect(access.getAppConfSnapshot()).andReturn(treeOps).anyTimes();
expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
@@ -770,140 +990,32 @@
doReturn("HOST1").when(mockAps).getClusterInfoPropertyValue(anyString());
- Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<>();
- Map<String, ClusterNode> container = new HashMap<>();
- ClusterNode cn1 = new ClusterNode(new MyContainerId(1));
+ Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<String, Map<String, ClusterNode>>();
+ Map<String, ClusterNode> container = new HashMap<String, ClusterNode>();
+ ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
cn1.host = "HOST1";
container.put("cid1", cn1);
roleClusterNodeMap.put("HBASE_MASTER", container);
doReturn(roleClusterNodeMap).when(mockAps).getRoleClusterNodeMapping();
- Map<String, String> allocatedPorts = new HashMap<>();
- allocatedPorts.put("a.port", "10023");
- allocatedPorts.put("b.port", "10024");
+ Map<String, String> allocatedPorts = new HashMap<String, String>();
+ allocatedPorts.put("hbase-site.a.port", "10023");
+ allocatedPorts.put("hbase-site.b.port", "10024");
doReturn(allocatedPorts).when(mockAps).getAllocatedPorts();
+ Map<String, String> allocatedPorts2 = new HashMap<String, String>();
+ allocatedPorts2.put("hbase-site.random.port", "10025");
+ doReturn(allocatedPorts2).when(mockAps).getAllocatedPorts(anyString());
replay(access);
- mockAps.addStartCommand("HBASE_MASTER", "cid1", hbr, "");
+ mockAps.addStartCommand("HBASE_MASTER", "cid1", hbr, "", Boolean.FALSE);
Assert.assertTrue(hbr.getExecutionCommands().get(0).getConfigurations().containsKey("hbase-site"));
Map<String, String> hbaseSiteConf = hbr.getExecutionCommands().get(0).getConfigurations().get("hbase-site");
Assert.assertTrue(hbaseSiteConf.containsKey("a.port"));
- Assert.assertTrue(hbaseSiteConf.get("a.port").equals("10023"));
- Assert.assertTrue(hbaseSiteConf.get("b.port").equals("10024"));
+ Assert.assertEquals("10023", hbaseSiteConf.get("a.port"));
+ Assert.assertEquals("10024", hbaseSiteConf.get("b.port"));
+ Assert.assertEquals("10025", hbaseSiteConf.get("random.port"));
+ assertEquals("${HBASE_MASTER.ALLOCATED_PORT}",
+ hbaseSiteConf.get("random2.port"));
}
- private static class MyContainer extends Container {
-
- ContainerId cid = null;
-
- @Override
- public ContainerId getId() {
- return this.cid;
- }
-
- @Override
- public void setId(ContainerId containerId) {
- this.cid = containerId;
- }
-
- @Override
- public NodeId getNodeId() {
- return null; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public void setNodeId(NodeId nodeId) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public String getNodeHttpAddress() {
- return null; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public void setNodeHttpAddress(String s) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public Resource getResource() {
- return null; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public void setResource(Resource resource) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public Priority getPriority() {
- return null; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public void setPriority(Priority priority) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public Token getContainerToken() {
- return null; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public void setContainerToken(Token token) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public int compareTo(Container o) {
- return 0; //To change body of implemented methods use File | Settings | File Templates.
- }
- }
-
- private static class MyContainerId extends ContainerId {
- int id;
-
- private MyContainerId(int id) {
- this.id = id;
- }
-
- @Override
- public ApplicationAttemptId getApplicationAttemptId() {
- return null; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- protected void setApplicationAttemptId(ApplicationAttemptId applicationAttemptId) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public int getId() {
- return id; //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- protected void setId(int i) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- protected void build() {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
- @Override
- public int hashCode() {
- return this.id;
- }
-
- @Override
- public String toString() {
- return "MyContainerId{" +
- "id=" + id +
- '}';
- }
- }
}
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java
index 3ef1839..c123fbb 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java
@@ -19,6 +19,7 @@
package org.apache.slider.providers.agent;
import org.apache.slider.providers.agent.application.metadata.CommandOrder;
+import org.apache.slider.server.appmaster.model.mock.MockContainerId;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
@@ -29,6 +30,7 @@
public class TestComponentCommandOrder {
protected static final Logger log =
LoggerFactory.getLogger(TestComponentCommandOrder.class);
+ private final MockContainerId containerId = new MockContainerId(1);
@Test
public void testComponentCommandOrder() throws Exception {
@@ -43,11 +45,12 @@
co3.setRequires("C-STARTED,D-STARTED,E-INSTALLED");
ComponentCommandOrder cco = new ComponentCommandOrder(Arrays.asList(co1, co2, co3));
- ComponentInstanceState cisB = new ComponentInstanceState("B", "cid", "aid");
- ComponentInstanceState cisC = new ComponentInstanceState("C", "cid", "aid");
- ComponentInstanceState cisD = new ComponentInstanceState("D", "cid", "aid");
- ComponentInstanceState cisE = new ComponentInstanceState("E", "cid", "aid");
- ComponentInstanceState cisE2 = new ComponentInstanceState("E", "cid", "aid");
+ ComponentInstanceState cisB = new ComponentInstanceState("B",
+ containerId, "aid");
+ ComponentInstanceState cisC = new ComponentInstanceState("C", containerId, "aid");
+ ComponentInstanceState cisD = new ComponentInstanceState("D", containerId, "aid");
+ ComponentInstanceState cisE = new ComponentInstanceState("E", containerId, "aid");
+ ComponentInstanceState cisE2 = new ComponentInstanceState("E", containerId, "aid");
cisB.setState(State.STARTED);
cisC.setState(State.INSTALLED);
Assert.assertTrue(cco.canExecute("A", Command.START, Arrays.asList(cisB)));
@@ -92,8 +95,8 @@
co.setCommand(" A-START");
co.setRequires("B-STARTED , C-STARTED");
- ComponentInstanceState cisB = new ComponentInstanceState("B", "cid", "aid");
- ComponentInstanceState cisC = new ComponentInstanceState("C", "cid", "aid");
+ ComponentInstanceState cisB = new ComponentInstanceState("B", containerId, "aid");
+ ComponentInstanceState cisC = new ComponentInstanceState("C", containerId, "aid");
cisB.setState(State.STARTED);
cisC.setState(State.STARTED);
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java
index be9f178..a723394 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java
@@ -19,6 +19,7 @@
package org.apache.slider.providers.agent;
import junit.framework.TestCase;
+import org.apache.slider.server.appmaster.model.mock.MockContainerId;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -29,6 +30,7 @@
private State[] states = new State[]{
State.INIT, State.INSTALLING, State.INSTALLED,
State.STARTING, State.STARTED, State.INSTALL_FAILED};
+ private final MockContainerId containerId = new MockContainerId(1);
@Test
public void testValidateSupportedCommands() {
@@ -42,7 +44,7 @@
}
@Test
- public void testGetNextStateBasedOnResult() {
+ public void testGetNextStateBasedOnResult() throws Exception {
TestCase.assertEquals(State.INSTALLING, State.INSTALLING.getNextState(CommandResult.IN_PROGRESS));
TestCase.assertEquals(State.STARTING, State.STARTING.getNextState(CommandResult.IN_PROGRESS));
expectExceptionOnGetNextForResult(IllegalArgumentException.class, State.INIT, CommandResult.IN_PROGRESS);
@@ -66,98 +68,109 @@
}
@Test
- public void testGetNextStateBasedOnCommand() {
- for (int index = 0; index < states.length; index++) {
- TestCase.assertEquals(states[index], states[index].getNextState(Command.NOP));
+ public void testGetNextStateBasedOnCommand() throws Exception {
+ for (State state : states) {
+ TestCase.assertEquals(state, state.getNextState(Command.NOP));
}
TestCase.assertEquals(State.INSTALLING, State.INIT.getNextState(Command.INSTALL));
TestCase.assertEquals(State.INSTALLING, State.INSTALL_FAILED.getNextState(Command.INSTALL));
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INSTALLED, Command.INSTALL);
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTING, Command.INSTALL);
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTED, Command.INSTALL);
+ expectIllegalArgumentException(State.INSTALLED, Command.INSTALL);
+ expectIllegalArgumentException(State.STARTING, Command.INSTALL);
+ expectIllegalArgumentException(State.STARTED, Command.INSTALL);
TestCase.assertEquals(State.STARTING, State.INSTALLED.getNextState(Command.START));
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INIT, Command.START);
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INSTALL_FAILED, Command.START);
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTING, Command.START);
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INSTALLING, Command.START);
- expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTED, Command.START);
+ expectIllegalArgumentException(State.INIT, Command.START);
+ expectIllegalArgumentException(State.INSTALL_FAILED, Command.START);
+ expectIllegalArgumentException(State.STARTING, Command.START);
+ expectIllegalArgumentException(State.INSTALLING, Command.START);
+ expectIllegalArgumentException(State.STARTED, Command.START);
+ }
+
+ protected void expectIllegalArgumentException(State state, Command command) throws
+ Exception {
+ expectExceptionOnGetNextForCommand(IllegalArgumentException.class,
+ state, command);
}
@Test
public void validateStateTransitionNormal() {
- ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+ assertInState(State.INIT, componentInstanceState);
TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ assertInState(State.INIT, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.IN_PROGRESS, Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
TestCase.assertEquals(Command.START, componentInstanceState.getNextCommand());
componentInstanceState.commandIssued(Command.START);
- TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+ assertInState(State.STARTING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.IN_PROGRESS, Command.START);
- TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+ assertInState(State.STARTING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.START);
- TestCase.assertEquals(State.STARTED, componentInstanceState.getState());
+ assertInState(State.STARTED, componentInstanceState);
+ }
+
+ protected void assertInState(State state,
+ ComponentInstanceState componentInstanceState) {
+ TestCase.assertEquals(state, componentInstanceState.getState());
}
@Test
public void validateStateTransitionScenario2() {
- ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+ assertInState(State.INIT, componentInstanceState);
TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ assertInState(State.INIT, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+ assertInState(State.INSTALL_FAILED, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
TestCase.assertEquals(Command.START, componentInstanceState.getNextCommand());
componentInstanceState.commandIssued(Command.START);
- TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+ assertInState(State.STARTING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
componentInstanceState.commandIssued(Command.START);
componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.START);
- TestCase.assertEquals(State.STARTED, componentInstanceState.getState());
+ assertInState(State.STARTED, componentInstanceState);
}
@Test
public void tolerateMaxFailures() {
- ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+ assertInState(State.INIT, componentInstanceState);
TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ assertInState(State.INIT, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+ assertInState(State.INSTALL_FAILED, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+ assertInState(State.INSTALL_FAILED, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+ assertInState(State.INSTALL_FAILED, componentInstanceState);
try {
componentInstanceState.commandIssued(Command.INSTALL);
@@ -168,41 +181,41 @@
@Test
public void tolerateFewFailureThenReset() {
- ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+ assertInState(State.INIT, componentInstanceState);
TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
- TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+ assertInState(State.INIT, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+ assertInState(State.INSTALL_FAILED, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+ assertInState(State.INSTALL_FAILED, componentInstanceState);
componentInstanceState.commandIssued(Command.INSTALL);
- TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+ assertInState(State.INSTALLING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.INSTALL);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
componentInstanceState.commandIssued(Command.START);
- TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+ assertInState(State.STARTING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
componentInstanceState.commandIssued(Command.START);
- TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+ assertInState(State.STARTING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
componentInstanceState.commandIssued(Command.START);
- TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+ assertInState(State.STARTING, componentInstanceState);
componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
- TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+ assertInState(State.INSTALLED, componentInstanceState);
try {
componentInstanceState.commandIssued(Command.START);
@@ -213,7 +226,7 @@
@Test
public void testBadTransitions() {
- ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
+ ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
try {
componentInstanceState.commandIssued(Command.START);
@@ -262,25 +275,25 @@
}
private <T extends Throwable> void expectExceptionOnGetNextForResult(
- Class<T> expected, State state, CommandResult result) {
+ Class<T> expected, State state, CommandResult result) throws Exception {
try {
state.getNextState(result);
TestCase.fail("Must fail");
} catch (Exception e) {
if (!expected.isInstance(e)) {
- TestCase.fail("Unexpected exception " + e.getClass());
+ throw e;
}
}
}
private <T extends Throwable> void expectExceptionOnGetNextForCommand(
- Class<T> expected, State state, Command command) {
+ Class<T> expected, State state, Command command) throws Exception {
try {
state.getNextState(command);
TestCase.fail("Must fail");
} catch (Exception e) {
if (!expected.isInstance(e)) {
- TestCase.fail("Unexpected exception " + e.getClass());
+ throw e;
}
}
}
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
index c2cfafd..7314b72 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
@@ -16,6 +16,8 @@
*/
package org.apache.slider.providers.agent;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.model.mock.MockContainerId;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
@@ -54,10 +56,12 @@
AgentProviderService provider = createNiceMock(AgentProviderService.class);
HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 500);
Assert.assertFalse(hbm.isAlive());
- Map<String, ComponentInstanceState> statuses = new HashMap<>();
- ComponentInstanceState state = new ComponentInstanceState("HBASE_MASTER", "Cid", "Aid");
+ Map<String, ComponentInstanceState> statuses = new HashMap<String, ComponentInstanceState>();
+ ContainerId container1 = new MockContainerId(1);
+ ComponentInstanceState state = new ComponentInstanceState("HBASE_MASTER",
+ container1, "Aid");
state.setState(State.STARTED);
- state.setLastHeartbeat(System.currentTimeMillis());
+ state.heartbeat(System.currentTimeMillis());
statuses.put("label_1", state);
expect(provider.getComponentStatuses()).andReturn(statuses).anyTimes();
replay(provider);
@@ -72,65 +76,95 @@
@Test
public void testHeartbeatMonitorWithUnhealthyAndThenLost() throws Exception {
AgentProviderService provider = createNiceMock(AgentProviderService.class);
- HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 2 * 1000);
- Assert.assertFalse(hbm.isAlive());
- Map<String, ComponentInstanceState> statuses = new HashMap<>();
- ComponentInstanceState masterState = new ComponentInstanceState("HBASE_MASTER", "Cid1", "Aid1");
+ long now = 100000;
+ int wakeupInterval = 2 * 1000;
+
+ Map<String, ComponentInstanceState> statuses = new HashMap<String, ComponentInstanceState>();
+ ContainerId masterContainer = new MockContainerId(1);
+ ContainerId slaveContainer = new MockContainerId(2);
+ ComponentInstanceState masterState = new ComponentInstanceState("HBASE_MASTER",
+ masterContainer, "Aid1");
+ String masterLabel = "Aid1_Cid1_HBASE_MASTER";
+ statuses.put(masterLabel, masterState);
+
+ ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER",
+ slaveContainer, "Aid1");
+ String slaveLabel = "Aid1_Cid2_HBASE_REGIONSERVER";
+ statuses.put(slaveLabel, slaveState);
+
masterState.setState(State.STARTED);
- masterState.setLastHeartbeat(System.currentTimeMillis());
- statuses.put("Aid1_Cid1_HBASE_MASTER", masterState);
-
- ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER", "Cid2", "Aid1");
+ masterState.heartbeat(now);
slaveState.setState(State.STARTED);
- slaveState.setLastHeartbeat(System.currentTimeMillis());
- statuses.put("Aid1_Cid2_HBASE_REGIONSERVER", slaveState);
-
+ slaveState.heartbeat(now);
expect(provider.getComponentStatuses()).andReturn(statuses).anyTimes();
- expect(provider.releaseContainer("Aid1_Cid2_HBASE_REGIONSERVER")).andReturn(true).once();
replay(provider);
- hbm.start();
- Thread.sleep(1 * 1000);
+
+ HeartbeatMonitor heartbeatMonitor = new HeartbeatMonitor(provider,
+ wakeupInterval);
+ Assert.assertFalse(heartbeatMonitor.isAlive());
+ now += wakeupInterval;
+ masterState.setState(State.STARTED);
+ masterState.heartbeat(now);
+
+ slaveState.setState(State.STARTED);
// just dial back by at least 2 sec but no more than 4
- slaveState.setLastHeartbeat(System.currentTimeMillis() - (2 * 1000 + 100));
- masterState.setLastHeartbeat(System.currentTimeMillis());
+ slaveState.heartbeat(now - (wakeupInterval + 100));
- Thread.sleep(1 * 1000 + 500);
- masterState.setLastHeartbeat(System.currentTimeMillis());
- log.info("Slave container state {}", slaveState.getContainerState());
- Assert.assertEquals(ContainerState.HEALTHY, masterState.getContainerState());
- Assert.assertEquals(ContainerState.UNHEALTHY, slaveState.getContainerState());
+ assertInState(ContainerState.HEALTHY, masterState, now);
+ assertInState(ContainerState.HEALTHY, slaveState, now);
+
+ //tick #1
+ heartbeatMonitor.doWork(now);
- Thread.sleep(1 * 1000);
- // some lost heartbeats are ignored (e.g. ~ 1 sec)
- masterState.setLastHeartbeat(System.currentTimeMillis() - 1 * 1000);
+ assertInState(ContainerState.HEALTHY, masterState, now);
+ assertInState(ContainerState.UNHEALTHY, slaveState, now);
- Thread.sleep(1 * 1000 + 500);
+ // heartbeat from the master
+ masterState.heartbeat(now + 1500);
- log.info("Slave container state {}", slaveState.getContainerState());
- Assert.assertEquals(ContainerState.HEALTHY, masterState.getContainerState());
- Assert.assertEquals(ContainerState.HEARTBEAT_LOST, slaveState.getContainerState());
- hbm.shutdown();
+ // tick #2
+ now += wakeupInterval;
+ heartbeatMonitor.doWork(now);
+
+ assertInState(ContainerState.HEALTHY, masterState, now);
+ assertInState(ContainerState.HEARTBEAT_LOST, slaveState, now);
+ }
+
+ protected void assertInState(ContainerState expectedState,
+ ComponentInstanceState componentInstanceState, long now) {
+ ContainerState actualState = componentInstanceState.getContainerState();
+ if (!expectedState.equals(actualState)) {
+ // mismatch
+ Assert.fail(String.format("at [%06d] Expected component state %s " +
+ "but found state %s in in component %s",
+ now, expectedState, actualState, componentInstanceState));
+ }
}
@Test
public void testHeartbeatTransitions() {
- ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER", "Cid2", "Aid1");
+ ContainerId container2 = new MockContainerId(2);
+ ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER",
+ container2, "Aid1");
slaveState.setState(State.STARTED);
- Assert.assertEquals(ContainerState.INIT, slaveState.getContainerState());
- slaveState.setLastHeartbeat(System.currentTimeMillis());
- Assert.assertEquals(ContainerState.HEALTHY, slaveState.getContainerState());
+ long lastHeartbeat = System.currentTimeMillis();
+ assertInState(ContainerState.INIT, slaveState, 0);
+ slaveState.heartbeat(lastHeartbeat);
+ assertInState(ContainerState.HEALTHY, slaveState, lastHeartbeat);
slaveState.setContainerState(ContainerState.UNHEALTHY);
- Assert.assertEquals(ContainerState.UNHEALTHY, slaveState.getContainerState());
- slaveState.setLastHeartbeat(System.currentTimeMillis());
- Assert.assertEquals(ContainerState.HEALTHY, slaveState.getContainerState());
+ lastHeartbeat = System.currentTimeMillis();
+ assertInState(ContainerState.UNHEALTHY, slaveState, lastHeartbeat);
+ slaveState.heartbeat(lastHeartbeat);
+ assertInState(ContainerState.HEALTHY, slaveState, lastHeartbeat);
slaveState.setContainerState(ContainerState.HEARTBEAT_LOST);
- Assert.assertEquals(ContainerState.HEARTBEAT_LOST, slaveState.getContainerState());
- slaveState.setLastHeartbeat(System.currentTimeMillis());
- Assert.assertEquals(ContainerState.HEARTBEAT_LOST, slaveState.getContainerState());
+ assertInState(ContainerState.HEARTBEAT_LOST, slaveState, lastHeartbeat);
+ lastHeartbeat = System.currentTimeMillis();
+ slaveState.heartbeat(lastHeartbeat);
+ assertInState(ContainerState.HEARTBEAT_LOST, slaveState, lastHeartbeat);
}
}
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/MetainfoParserTest.java b/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/TestMetainfoParser.java
similarity index 87%
rename from slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/MetainfoParserTest.java
rename to slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/TestMetainfoParser.java
index 61c53df..98f0afb 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/MetainfoParserTest.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/TestMetainfoParser.java
@@ -27,9 +27,9 @@
/**
*
*/
-public class MetainfoParserTest {
+public class TestMetainfoParser {
protected static final Logger log =
- LoggerFactory.getLogger(MetainfoParserTest.class);
+ LoggerFactory.getLogger(TestMetainfoParser.class);
public static final String METAINFO_XML =
"/org/apache/slider/providers/agent/application/metadata/metainfo.xml";
@@ -52,6 +52,10 @@
for (Component comp : application.getComponents()) {
if (comp != null && comp.getName().equals("NIMBUS")) {
found = true;
+ Assert.assertEquals(0, comp.getComponentExports().size());
+ }
+ if (comp != null && comp.getName().equals("SUPERVISOR")) {
+ Assert.assertEquals(1, comp.getComponentExports().size());
}
}
assert found;
diff --git a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java
index 17fbe2b..7e2ab3c 100644
--- a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java
+++ b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java
@@ -24,7 +24,6 @@
import com.sun.jersey.api.client.config.ClientConfig;
import com.sun.jersey.api.client.config.DefaultClientConfig;
import com.sun.jersey.api.json.JSONConfiguration;
-import com.sun.jersey.test.framework.JerseyTest;
import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -38,12 +37,12 @@
import org.apache.slider.server.appmaster.model.mock.MockSliderClusterProtocol;
import org.apache.slider.server.appmaster.state.AppState;
import org.apache.slider.server.appmaster.state.ProviderAppState;
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
import org.apache.slider.server.appmaster.web.WebAppApi;
import org.apache.slider.server.appmaster.web.WebAppApiImpl;
import org.apache.slider.server.appmaster.web.rest.RestPaths;
import org.apache.slider.server.services.security.CertificateManager;
import org.apache.slider.server.services.security.SecurityUtils;
-import org.apache.slider.test.SliderTestBase;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
@@ -125,11 +124,12 @@
appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES);
appState.buildInstance(
factory.newInstanceDefinition(0, 0, 0),
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath,
- null, null);
+ null, null, new SimpleReleaseSelector());
} catch (Exception e) {
log.error("Failed to set up app {}", e);
}
diff --git a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java
index 134f0bc..91aa2b0 100644
--- a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java
+++ b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java
@@ -45,6 +45,7 @@
import org.apache.slider.server.appmaster.model.mock.MockSliderClusterProtocol;
import org.apache.slider.server.appmaster.state.AppState;
import org.apache.slider.server.appmaster.state.ProviderAppState;
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
import org.apache.slider.server.appmaster.web.WebAppApi;
import org.apache.slider.server.appmaster.web.WebAppApiImpl;
import org.apache.slider.server.appmaster.web.rest.AMWebServices;
@@ -170,12 +171,20 @@
appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES);
appState.buildInstance(
factory.newInstanceDefinition(0, 0, 0),
+ new Configuration(),
new Configuration(false),
factory.ROLES,
fs,
historyPath,
- null, null);
- } catch (IOException | BadClusterStateException | URISyntaxException | BadConfigException e) {
+ null, null, new SimpleReleaseSelector());
+// JDK7 } catch (IOException | BadClusterStateException | URISyntaxException | BadConfigException e) {
+ } catch (IOException e) {
+ log.error("{}", e, e);
+ } catch (BadClusterStateException e) {
+ log.error("{}", e, e);
+ } catch (URISyntaxException e) {
+ log.error("{}", e, e);
+ } catch (BadConfigException e) {
log.error("{}", e, e);
}
ProviderAppState providerAppState = new ProviderAppState("undefined",
diff --git a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java
index 2427009..eb368e3 100644
--- a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java
+++ b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java
@@ -16,20 +16,22 @@
*/
package org.apache.slider.server.appmaster.web.rest.publisher;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.slider.providers.agent.AgentProviderService;
-import org.apache.slider.server.appmaster.AMViewForProviders;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
import org.apache.slider.server.appmaster.state.StateAccessForProviders;
import org.apache.slider.server.services.registry.RegistryViewForProviders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
/**
*
*/
-public class TestAgentProviderService extends AgentProviderService{
+public class TestAgentProviderService extends AgentProviderService {
protected static final Logger log =
LoggerFactory.getLogger(TestAgentProviderService.class);
@@ -40,14 +42,16 @@
@Override
public void bind(StateAccessForProviders stateAccessor,
- RegistryViewForProviders reg, AMViewForProviders amView) {
- super.bind(stateAccessor, reg, amView);
- Map<String,String> dummyProps = new HashMap<>();
+ RegistryViewForProviders reg,
+ QueueAccess queueAccess,
+ List<Container> liveContainers) {
+ super.bind(stateAccessor, reg, queueAccess, liveContainers);
+ Map<String,String> dummyProps = new HashMap<String, String>();
dummyProps.put("prop1", "val1");
dummyProps.put("prop2", "val2");
log.info("publishing dummy-site.xml with values {}", dummyProps);
- publishComponentConfiguration("dummy-site", "dummy configuration",
- dummyProps.entrySet());
+ publishApplicationInstanceData("dummy-site", "dummy configuration",
+ dummyProps.entrySet());
}
diff --git a/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java b/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java
index e77eeb3..45fdc86 100644
--- a/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java
+++ b/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java
@@ -37,7 +37,7 @@
* @return commands
*/
public List<String> ls(File dir) {
- List<String> commands = new ArrayList<>(5);
+ List<String> commands = new ArrayList<String>(5);
commands.add("ls");
commands.add("-1");
commands.add(dir.getAbsolutePath());
@@ -50,7 +50,7 @@
* @return commands
*/
public List<String> echo(String text) {
- List<String> commands = new ArrayList<>(5);
+ List<String> commands = new ArrayList<String>(5);
commands.add("echo");
commands.add(text);
return commands;
@@ -72,7 +72,7 @@
* @return commands
*/
public List<String> exitFalse() {
- List<String> commands = new ArrayList<>(2);
+ List<String> commands = new ArrayList<String>(2);
commands.add("false");
return commands;
}
diff --git a/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java b/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java
index 9514f47..dc160d9 100644
--- a/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java
+++ b/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java
@@ -20,10 +20,14 @@
import org.junit.Test;
+import java.util.concurrent.ExecutorService;
+
+/**
+ * Basic tests for executor service
+ */
public class TestWorkflowExecutorService extends WorkflowServiceTestBase {
-
@Test
public void testAsyncRun() throws Throwable {
@@ -51,7 +55,8 @@
assertNotNull(runnable.getException());
}
- private static class ExecutorSvc extends AbstractWorkflowExecutorService {
+ private static class ExecutorSvc
+ extends WorkflowExecutorService<ExecutorService> {
private ExecutorSvc() {
super("ExecutorService",
ServiceThreadFactory.singleThreadExecutor("test", true));
diff --git a/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java b/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java
index cc2cc9b..d739324 100644
--- a/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java
+++ b/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java
@@ -75,9 +75,9 @@
this.started = false;
this.configuration = configuration;
activeZKServerIndex = -1;
- zooKeeperServers = new ArrayList<>();
- clientPortList = new ArrayList<>();
- standaloneServerFactoryList = new ArrayList<>();
+ zooKeeperServers = new ArrayList<ZooKeeperServer>();
+ clientPortList = new ArrayList<Integer>();
+ standaloneServerFactoryList = new ArrayList<NIOServerCnxnFactory>();
}
public void setDefaultClientPort(int clientPort) {
diff --git a/slider-core/src/test/python/agent/main.py b/slider-core/src/test/python/agent/main.py
index 8b7044e..e50642d 100755
--- a/slider-core/src/test/python/agent/main.py
+++ b/slider-core/src/test/python/agent/main.py
@@ -34,9 +34,8 @@
parser.add_option("--config", dest="conf_folder", help="conf folder")
parser.add_option('--command', dest='command', help='command to execute')
parser.add_option('--label', dest='label', help='label')
- parser.add_option('--host', dest='host', help='port')
- parser.add_option('--port', dest='port', help='host')
- parser.add_option('--secured_port', dest='secured_port', help='host')
+ parser.add_option('--zk-quorum', dest='host:2181', help='zookeeper quorum')
+ parser.add_option('--zk-reg-path', dest='/register/org-apache-slider/cl1', help='zookeeper registry path')
(options, args) = parser.parse_args()
diff --git a/slider-core/src/test/resources/log4j.properties b/slider-core/src/test/resources/log4j.properties
index a552a55..c1a524d 100644
--- a/slider-core/src/test/resources/log4j.properties
+++ b/slider-core/src/test/resources/log4j.properties
@@ -42,7 +42,7 @@
log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN
log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
log4j.logger.org.apache.hadoop.hdfs=WARN
-
+log4j.logger.BlockStateChange=WARN
log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN
log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl=WARN
diff --git a/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml b/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml
index 3d24f96..d9004ad 100644
--- a/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml
+++ b/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml
@@ -22,11 +22,67 @@
<name>STORM</name>
<comment>Apache Hadoop Stream processing framework</comment>
<version>0.9.1.2.1</version>
+ <exportedConfigs>storm-site</exportedConfigs>
+
+ <exportGroups>
+ <exportGroup>
+ <name>QuickLinks</name>
+ <exports>
+ <export>
+ <name>app.jmx</name>
+ <value>http://${STORM_REST_API_HOST}:${site.global.rest_api_port}/api/cluster/summary</value>
+ </export>
+ <export>
+ <name>app.monitor</name>
+ <value>http://${STORM_UI_SERVER_HOST}:${site.storm-site.ui.port}</value>
+ </export>
+ <export>
+ <name>app.metrics</name>
+ <value>http://${site.global.ganglia_server_host}/cgi-bin/rrd.py?c=${site.global.ganglia_server_id}</value>
+ </export>
+ <export>
+ <name>ganglia.ui</name>
+ <value>http://${site.global.ganglia_server_host}/ganglia?c=${site.global.ganglia_server_id}</value>
+ </export>
+ <export>
+ <name>nimbus.url</name>
+ <value>http://${NIMBUS_HOST}:${site.storm-site.nimbus.thrift.port}</value>
+ </export>
+ </exports>
+ </exportGroup>
+ </exportGroups>
+
+ <commandOrders>
+ <commandOrder>
+ <command>NIMBUS-START</command>
+ <requires>SUPERVISOR-INSTALLED,STORM_UI_SERVER-INSTALLED,DRPC_SERVER-INSTALLED,STORM_REST_API-INSTALLED
+ </requires>
+ </commandOrder>
+ <commandOrder>
+ <command>SUPERVISOR-START</command>
+ <requires>NIMBUS-STARTED</requires>
+ </commandOrder>
+ <commandOrder>
+ <command>DRPC_SERVER-START</command>
+ <requires>NIMBUS-STARTED</requires>
+ </commandOrder>
+ <commandOrder>
+ <command>STORM_REST_API-START</command>
+ <requires>NIMBUS-STARTED,DRPC_SERVER-STARTED,STORM_UI_SERVER-STARTED</requires>
+ </commandOrder>
+ <commandOrder>
+ <command>STORM_UI_SERVER-START</command>
+ <requires>NIMBUS-STARTED</requires>
+ </commandOrder>
+ </commandOrders>
+
<components>
<component>
<name>NIMBUS</name>
<category>MASTER</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
+ <appExports>QuickLinks-nimbus.url,QuickLinks-ganglia.ui,QuickLinks-app.metrics</appExports>
<commandScript>
<script>scripts/nimbus.py</script>
<scriptType>PYTHON</scriptType>
@@ -37,6 +93,8 @@
<component>
<name>STORM_REST_API</name>
<category>MASTER</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
+ <appExports>QuickLinks-app.jmx</appExports>
<commandScript>
<script>scripts/rest_api.py</script>
<scriptType>PYTHON</scriptType>
@@ -47,6 +105,13 @@
<component>
<name>SUPERVISOR</name>
<category>SLAVE</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
+ <componentExports>
+ <componentExport>
+ <name>log_viewer_port</name>
+ <value>${THIS_HOST}:${site.storm-site.logviewer.port}</value>
+ </componentExport>
+ </componentExports>
<commandScript>
<script>scripts/supervisor.py</script>
<scriptType>PYTHON</scriptType>
@@ -57,6 +122,9 @@
<component>
<name>STORM_UI_SERVER</name>
<category>MASTER</category>
+ <publishConfig>true</publishConfig>
+ <appExports>QuickLinks-app.monitor</appExports>
+ <autoStartOnFailure>true</autoStartOnFailure>
<commandScript>
<script>scripts/ui_server.py</script>
<scriptType>PYTHON</scriptType>
@@ -67,6 +135,7 @@
<component>
<name>DRPC_SERVER</name>
<category>MASTER</category>
+ <autoStartOnFailure>true</autoStartOnFailure>
<commandScript>
<script>scripts/drpc_server.py</script>
<scriptType>PYTHON</scriptType>
@@ -86,10 +155,5 @@
</packages>
</osSpecific>
</osSpecifics>
-
- <configuration-dependencies>
- <config-type>storm-site</config-type>
- <config-type>global</config-type>
- </configuration-dependencies>
</application>
</metainfo>
diff --git a/slider-funtest/pom.xml b/slider-funtest/pom.xml
index cb16669..39d28e0 100644
--- a/slider-funtest/pom.xml
+++ b/slider-funtest/pom.xml
@@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
</parent>
<properties>
<work.dir>package-tmp</work.dir>
@@ -67,14 +67,6 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -93,8 +85,17 @@
<!-- test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>${maven-surefire-plugin.version}</version>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>${maven-failsafe-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>run-integration-tests</id>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
<configuration>
<!--mvn process fork options-->
<reuseForks>${test.reuseForks}</reuseForks>
@@ -121,12 +122,6 @@
<test.app.resource>../slider-core/src/test/app_packages/test_command_log/resources.json</test.app.resource>
<test.app.template>../slider-core/src/test/app_packages/test_command_log/appConfig.json</test.app.template>
</systemPropertyVariables>
- <includes>
- <include>**/Test*.java</include>
- </includes>
- <excludes>
- <exclude>**/Test*$*.java</exclude>
- </excludes>
</configuration>
</plugin>
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy
index c42589a..e0b87f7 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy
@@ -143,7 +143,6 @@
@Test
public void testConfHasDefaultFS() throws Throwable {
Configuration conf = loadSliderConf()
- assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
String fs = conf.get("fs.defaultFS")
log.info("Test Filesystem $fs")
assert fs != null
@@ -152,9 +151,7 @@
@Test
public void testConfHasRM() throws Throwable {
-
Configuration conf = loadSliderConf()
- assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
String val = conf.get(YarnConfiguration.RM_ADDRESS)
log.info("$YarnConfiguration.RM_ADDRESS = $val")
assert val != YarnConfiguration.DEFAULT_RM_ADDRESS
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
index 26ae2bb..0a0ac16 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
@@ -23,10 +23,6 @@
import org.apache.slider.common.SliderExitCodes
import org.apache.slider.common.params.Arguments
import org.apache.slider.common.params.SliderActions
-import org.apache.slider.funtest.framework.AgentUploads
-import org.apache.slider.funtest.framework.CommandTestBase
-import org.apache.slider.funtest.framework.FuntestProperties
-import org.apache.slider.funtest.framework.SliderShell
import org.apache.tools.zip.ZipEntry
import org.apache.tools.zip.ZipOutputStream
import org.junit.Before
@@ -65,7 +61,6 @@
public TemporaryFolder folder = new TemporaryFolder();
public static void assumeAgentTestsEnabled() {
- assumeFunctionalTestsEnabled()
assume(AGENTTESTS_ENABLED, "Agent tests disabled")
}
@@ -158,31 +153,6 @@
return null;
}
- public static boolean isApplicationInState(String text, String applicationName) {
- boolean exists = false
- SliderShell shell = slider(EXIT_SUCCESS,
- [
- ACTION_LIST,
- applicationName])
- for (String str in shell.out) {
- if (str.contains(text)) {
- exists = true
- }
- }
-
- return exists
- }
-
- protected void ensureApplicationIsUp(String clusterName) {
- repeatUntilTrue(this.&isApplicationUp, 15, 1000 * 3, ['arg1': clusterName],
- true, 'Application did not start, aborting test.')
- }
-
- boolean isApplicationUp(Map<String, String> args) {
- String applicationName = args['arg1'];
- return isApplicationInState("RUNNING", applicationName);
- }
-
public static void addDir(File dirObj, ZipOutputStream zipFile, String prefix) {
dirObj.eachFile() { file ->
if (file.directory) {
@@ -196,23 +166,6 @@
}
}
- protected void repeatUntilTrue(Closure c, int maxAttempts, int sleepDur, Map args,
- boolean failIfUnsuccessful = false, String message = "") {
- int attemptCount = 0
- while (attemptCount < maxAttempts) {
- if (c(args)) {
- break
- };
- attemptCount++;
-
- if (failIfUnsuccessful) {
- assert attemptCount != maxAttempts, message
- }
-
- sleep(sleepDur)
- }
- }
-
protected void cleanup(String applicationName) throws Throwable {
if (setup_failed) {
// cleanup probably won't work if setup failed
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 08d352a..278bd2b 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -66,8 +66,6 @@
public static final int SLIDER_TEST_TIMEOUT
- public static final boolean FUNTESTS_ENABLED
-
public static final String YARN_RAM_REQUEST
@@ -83,8 +81,6 @@
SLIDER_TEST_TIMEOUT = getTimeOptionMillis(SLIDER_CONFIG,
KEY_TEST_TIMEOUT,
1000 * DEFAULT_TEST_TIMEOUT_SECONDS)
- FUNTESTS_ENABLED =
- SLIDER_CONFIG.getBoolean(KEY_SLIDER_FUNTESTS_ENABLED, true)
YARN_RAM_REQUEST = SLIDER_CONFIG.get(
KEY_TEST_YARN_RAM_REQUEST,
@@ -215,6 +211,20 @@
])
}
+
+ static SliderShell freeze(
+ int exitCode,
+ String name,
+ Collection<String> args) {
+ slider(exitCode, [ACTION_FREEZE, name] + args)
+ }
+
+ /**
+ * Freeze cluster: no exit code checking
+ * @param name
+ * @param args
+ * @return
+ */
static SliderShell freeze(String name, Collection<String> args) {
slider([ACTION_FREEZE, name] + args)
}
@@ -292,7 +302,7 @@
}
static SliderShell thaw(String name, Collection<String> args) {
- slider([ACTION_THAW, name] + args)
+ slider(0, [ACTION_THAW, name] + args)
}
static SliderShell registry(int result, Collection<String> commands) {
@@ -327,9 +337,7 @@
* @param cluster
*/
static void setupCluster(String cluster) {
- if (FUNTESTS_ENABLED) {
- ensureClusterDestroyed(cluster)
- }
+ ensureClusterDestroyed(cluster)
}
/**
@@ -338,9 +346,7 @@
* @param name cluster name
*/
static void teardown(String name) {
- if (FUNTESTS_ENABLED) {
- freezeForce(name)
- }
+ freezeForce(name)
}
/**
@@ -508,11 +514,46 @@
return status
}
- /**
- * if tests are not enabled: skip them
- */
- public static void assumeFunctionalTestsEnabled() {
- assume(FUNTESTS_ENABLED, "Functional tests disabled")
+ protected void ensureApplicationIsUp(String clusterName) {
+ repeatUntilTrue(this.&isApplicationUp, 15, 1000 * 3, ['arg1': clusterName],
+ true, 'Application did not start, aborting test.')
+ }
+
+ protected boolean isApplicationUp(Map<String, String> args) {
+ String applicationName = args['arg1'];
+ return isApplicationInState("RUNNING", applicationName);
+ }
+
+ public static boolean isApplicationInState(String text, String applicationName) {
+ boolean exists = false
+ SliderShell shell = slider(0,
+ [
+ ACTION_LIST,
+ applicationName])
+ for (String str in shell.out) {
+ if (str.contains(text)) {
+ exists = true
+ }
+ }
+
+ return exists
+ }
+
+ protected void repeatUntilTrue(Closure c, int maxAttempts, int sleepDur, Map args,
+ boolean failIfUnsuccessful = false, String message = "") {
+ int attemptCount = 0
+ while (attemptCount < maxAttempts) {
+ if (c(args)) {
+ break
+ };
+ attemptCount++;
+
+ if (failIfUnsuccessful) {
+ assert attemptCount != maxAttempts, message
+ }
+
+ sleep(sleepDur)
+ }
}
}
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy
index 9b63c22..1096dfa 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy
@@ -40,7 +40,6 @@
String KEY_SLIDER_TEST_NUM_WORKERS = "slider.test.cluster.size"
int DEFAULT_SLIDER_NUM_WORKERS = 1
- String KEY_SLIDER_TEST_ZK_HOSTS = "slider.test.zkhosts";
String DEFAULT_SLIDER_ZK_HOSTS = "localhost:2181";
/**
@@ -49,8 +48,6 @@
String KEY_AM_RESTART_SLEEP_TIME = "slider.test.am.restart.time"
int DEFAULT_AM_RESTART_SLEEP_TIME = 30000
- String KEY_SLIDER_FUNTESTS_ENABLED = "slider.funtest.enabled"
-
String CLIENT_CONFIG_FILENAME = SliderKeys.CLIENT_RESOURCE
String ENV_CONF_DIR = "SLIDER_CONF_DIR"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestBuildSetup.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/BuildSetupIT.groovy
similarity index 95%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestBuildSetup.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/BuildSetupIT.groovy
index b6f9a12..c8fdb97 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestBuildSetup.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/BuildSetupIT.groovy
@@ -32,5 +32,5 @@
*/
@CompileStatic
@Slf4j
-class TestBuildSetup extends AbstractTestBuildSetup {
+class BuildSetupIT extends AbstractTestBuildSetup {
}
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestClusterConnectivity.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/ClusterConnectivityIT.groovy
similarity index 95%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestClusterConnectivity.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/ClusterConnectivityIT.groovy
index b9d768a..9b8fe6f 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestClusterConnectivity.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/ClusterConnectivityIT.groovy
@@ -34,16 +34,11 @@
* Test basic connectivity with the target cluster, including
* HDFS, YARN and ZK
*/
-class TestClusterConnectivity extends CommandTestBase {
+class ClusterConnectivityIT extends CommandTestBase {
public static final int CONNECT_TIMEOUT = 2000
- @BeforeClass
- public static void setup() {
- assumeFunctionalTestsEnabled()
- }
-
@Test
public void testFileSystemUp() throws Throwable {
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestSignCorrection.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/SignCorrectionIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestSignCorrection.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/SignCorrectionIT.groovy
index 7feb11d..eee75a8 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestSignCorrection.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/SignCorrectionIT.groovy
@@ -26,7 +26,7 @@
* This just verifies the two's complement sign correction that will
* be applied after the return code is picked up from the shell
*/
-class TestSignCorrection {
+class SignCorrectionIT {
@Test
public void test255ToMinus1() throws Throwable {
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestListCommand.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/ListCommandIT.groovy
similarity index 87%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestListCommand.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/ListCommandIT.groovy
index ce7b497..20bac88 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestListCommand.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/ListCommandIT.groovy
@@ -26,13 +26,8 @@
@CompileStatic
@Slf4j
-public class TestListCommand extends CommandTestBase {
+public class ListCommandIT extends CommandTestBase {
- @BeforeClass
- public static void prepareCluster() {
- assumeFunctionalTestsEnabled();
- }
-
@Test
public void testListAll() throws Throwable {
assertSuccess(list(null))
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestSimpleCommands.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/SimpleCommandsIT.groovy
similarity index 95%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestSimpleCommands.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/SimpleCommandsIT.groovy
index 2d00130..bf742c9 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestSimpleCommands.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/SimpleCommandsIT.groovy
@@ -28,7 +28,7 @@
@CompileStatic
@Slf4j
-public class TestSimpleCommands extends CommandTestBase {
+public class SimpleCommandsIT extends CommandTestBase {
@Test
public void testVersion() throws Throwable {
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestUnknownClusterOperations.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/UnknownClusterOperationsIT.groovy
similarity index 94%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestUnknownClusterOperations.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/UnknownClusterOperationsIT.groovy
index 7791c3c..39ae4dd 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestUnknownClusterOperations.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/UnknownClusterOperationsIT.groovy
@@ -35,15 +35,10 @@
@CompileStatic
@Slf4j
@org.junit.experimental.categories.Category(FunctionalTests)
-public class TestUnknownClusterOperations extends CommandTestBase {
+public class UnknownClusterOperationsIT extends CommandTestBase {
public static final String UNKNOWN = "unknown_cluster"
- @BeforeClass
- public static void prepareCluster() {
- assumeFunctionalTestsEnabled();
- }
-
@Test
public void testFreezeUnknownCluster() throws Throwable {
SliderShell shell = freeze(UNKNOWN)
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentClusterLifecycle.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
similarity index 94%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentClusterLifecycle.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index 0d643ca..6b0f2bd 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentClusterLifecycle.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -36,7 +36,7 @@
@CompileStatic
@Slf4j
-public class TestAgentClusterLifecycle extends AgentCommandTestBase
+public class AgentClusterLifecycleIT extends AgentCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
@@ -126,7 +126,7 @@
log.info("Connected via Client {}", sliderClient.toString())
//freeze
- freeze(CLUSTER, [
+ freeze(0, CLUSTER, [
ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
ARG_MESSAGE, "freeze-in-test-cluster-lifecycle"
])
@@ -145,7 +145,7 @@
exists(0, CLUSTER)
describe " >>> Cluster is now thawed."
- freeze(CLUSTER,
+ freeze(0, CLUSTER,
[
ARG_FORCE,
ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
@@ -178,7 +178,12 @@
StatusKeys.INFO_CONTAINERS_AM_RESTART)
assert restarted != null
assert Integer.parseInt(restarted) == 0
- freeze(CLUSTER)
+ freeze(0, CLUSTER,
+ [
+ ARG_FORCE,
+ ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
+ ARG_MESSAGE, "final-shutdown"
+ ])
destroy(0, CLUSTER)
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures2.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures2.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
index 7804042..0ba48ba 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures2.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
@@ -31,7 +31,7 @@
@CompileStatic
@Slf4j
-public class TestAgentFailures2 extends AgentCommandTestBase
+public class AgentFailures2IT extends AgentCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
private static String COMMAND_LOGGER = "COMMAND_LOGGER"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index ea58d5f..a51c769 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -31,7 +31,7 @@
@CompileStatic
@Slf4j
-public class TestAgentFailures extends AgentCommandTestBase
+public class AgentFailuresIT extends AgentCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
private static String COMMAND_LOGGER = "COMMAND_LOGGER"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAppsThroughAgent.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAppsThroughAgent.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
index 6b0f678..00a876a 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAppsThroughAgent.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
@@ -31,7 +31,7 @@
@CompileStatic
@Slf4j
-public class TestAppsThroughAgent extends AgentCommandTestBase
+public class AppsThroughAgentIT extends AgentCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
private static String COMMAND_LOGGER = "COMMAND_LOGGER"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestClusterBuildDestroy.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/ClusterBuildDestroyIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestClusterBuildDestroy.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/ClusterBuildDestroyIT.groovy
index ead1601..f8caac5 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestClusterBuildDestroy.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/ClusterBuildDestroyIT.groovy
@@ -34,7 +34,7 @@
@CompileStatic
@Slf4j
-public class TestClusterBuildDestroy extends AgentCommandTestBase
+public class ClusterBuildDestroyIT extends AgentCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
diff --git a/slider-install/pom.xml b/slider-install/pom.xml
deleted file mode 100644
index b08895f..0000000
--- a/slider-install/pom.xml
+++ /dev/null
@@ -1,111 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>slider-install</artifactId>
- <name>Slider Install</name>
- <packaging>pom</packaging>
- <description>
-
- Builds any RPMs and other install packaging for Slider.
- This is downstream of the slider assembly and the app packages, so
- that anything from these can be installed in the RPM.
- </description>
- <parent>
- <groupId>org.apache.slider</groupId>
- <artifactId>slider</artifactId>
- <version>0.40</version>
- </parent>
-
- <properties>
- <basedir>/usr/local/slider</basedir>
- <confdir>${basedir}/conf</confdir>
- <bindir>${basedir}/bin</bindir>
- </properties>
-
- <build>
- <plugins>
- <!--read in a build.properties file if defined-->
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>properties-maven-plugin</artifactId>
- <version>${maven.properties.version}</version>
- <executions>
- <execution>
- <phase>initialize</phase>
- <goals>
- <goal>read-project-properties</goal>
- </goals>
- <configuration>
- <quiet>true</quiet>
- <files>
- <file>build.properties</file>
- <file>../build.properties</file>
- </files>
- </configuration>
- </execution>
- </executions>
- </plugin>
-
- </plugins>
-
-
- </build>
-
- <reporting>
- <plugins>
-
-
-
- </plugins>
- </reporting>
-
- <dependencies>
-
- <dependency>
- <groupId>org.apache.slider</groupId>
- <artifactId>slider-core</artifactId>
- <version>${project.version}</version>
- </dependency>
-
- <!--
- needed to order the build and ensure the agent tar is found
- the test scope ensures that it isn't copied into the lib dir
- -->
- <dependency>
- <groupId>org.apache.slider</groupId>
- <artifactId>slider-agent</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- <type>tar.gz</type>
- </dependency>
-
- <dependency>
- <groupId>com.beust</groupId>
- <artifactId>jcommander</artifactId>
- </dependency>
-
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </dependency>
-
- </dependencies>
-
-
-</project>
diff --git a/slider-providers/accumulo/accumulo-funtests/pom.xml b/slider-providers/accumulo/accumulo-funtests/pom.xml
index 1b25b33..14388f4 100644
--- a/slider-providers/accumulo/accumulo-funtests/pom.xml
+++ b/slider-providers/accumulo/accumulo-funtests/pom.xml
@@ -27,7 +27,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../../</relativePath>
</parent>
@@ -59,14 +59,6 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -85,8 +77,17 @@
<!-- functional test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>${maven-surefire-plugin.version}</version>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>${maven-failsafe-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>run-integration-tests</id>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
<configuration>
<!--mvn process fork options-->
<reuseForks>${test.reuseForks}</reuseForks>
@@ -109,12 +110,6 @@
<slider.conf.dir>${slider.conf.dir}</slider.conf.dir>
<slider.bin.dir>../../../slider-assembly/target/slider-${project.version}-all/slider-${project.version}</slider.bin.dir>
</systemPropertyVariables>
- <includes>
- <include>**/Test*.java</include>
- </includes>
- <excludes>
- <exclude>**/Test*$*.java</exclude>
- </excludes>
</configuration>
</plugin>
@@ -148,6 +143,16 @@
<dependency>
<groupId>org.apache.slider</groupId>
<artifactId>slider-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.slider</groupId>
+ <artifactId>slider-core</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
@@ -212,9 +217,22 @@
<dependency>
<groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-fate</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-test</artifactId>
<scope>test</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ <version>0.9.0</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/TestStub.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/TestStub.groovy
deleted file mode 100644
index 3d9abb7..0000000
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/TestStub.groovy
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.slider.providers.accumulo
-
-import org.junit.Test
-
-/**
- * this is here to ensure there is always a test
- */
-class TestStub {
-
- @Test
- public void testStubTest() throws Throwable {
-
- }
-}
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloBuildSetup.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloBuildSetupIT.groovy
similarity index 91%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloBuildSetup.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloBuildSetupIT.groovy
index 61366da..109bce9 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloBuildSetup.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloBuildSetupIT.groovy
@@ -22,12 +22,11 @@
import org.apache.slider.funtest.abstracttests.AbstractTestBuildSetup
import org.junit.Test
-class TestAccumuloBuildSetup extends AbstractTestBuildSetup {
+class AccumuloBuildSetupIT extends AbstractTestBuildSetup {
@Test
public void testAccumuloBuildsHavePathsDefined() throws Throwable {
Configuration conf = loadSliderConf();
- assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
assumeBoolOption(conf, KEY_TEST_ACCUMULO_ENABLED, true)
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloCI.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCIIT.groovy
similarity index 91%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloCI.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCIIT.groovy
index 5573dd4..4ec5ff1 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloCI.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCIIT.groovy
@@ -16,7 +16,6 @@
*/
package org.apache.slider.providers.accumulo.funtest
-import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.accumulo.core.client.Connector
@@ -34,12 +33,8 @@
import org.apache.slider.funtest.framework.FuntestProperties
import org.apache.slider.funtest.framework.PortAssignments
-/**
- *
- */
-@CompileStatic
@Slf4j
-class TestAccumuloCI extends TestFunctionalAccumuloCluster {
+class AccumuloCIIT extends FunctionalAccumuloClusterIT {
@Override
String getClusterName() {
@@ -67,7 +62,6 @@
String zookeepers = SLIDER_CONFIG.get(SliderXmlConfKeys.REGISTRY_ZK_QUORUM,
FuntestProperties.DEFAULT_SLIDER_ZK_HOSTS)
ZooKeeperInstance inst = new ZooKeeperInstance(currentUser + "-" + clustername, zookeepers)
- PasswordToken passwd = new PasswordToken(getPassword())
Connector conn = inst.getConnector("root", new PasswordToken(getPassword()))
// Create the test table with some split points
@@ -83,7 +77,7 @@
String[] ciOpts = ["-i", inst.getInstanceName(),
"-z", zookeepers, "-u", "root",
"-p", getPassword(), "--table", tableName,
- "--num", Integer.toString(1000 * 1000 * 15 * getNumTservers()),
+ "--num", Integer.toString(1000 * 1000 * 4 * getNumTservers()),
"--batchMemory", "100000000",
"--batchLatency", "600000",
"--batchThreads", "1"]
@@ -95,7 +89,7 @@
Path verifyOutput = new Path("/user/" + currentUser + "/.slider/cluster/" + clustername + "/verify-output")
assert !clusterFS.exists(verifyOutput)
- YarnConfiguration verifyConf = new YarnConfiguration(CommandTestBase.SLIDER_CONFIG);
+ YarnConfiguration verifyConf = new YarnConfiguration(SLIDER_CONFIG);
// Try to load the necessary classes for the Mappers to find them
if (loadClassesForMapReduce(verifyConf)) {
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy
index 1b5d8bf..f050793 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy
@@ -18,6 +18,13 @@
package org.apache.slider.providers.accumulo.funtest
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.slider.common.SliderExitCodes
+import org.apache.slider.common.params.SliderActions
+import org.apache.slider.funtest.framework.FileUploader
+import org.apache.slider.providers.accumulo.AccumuloClientProvider
+import org.junit.BeforeClass
+
import static SliderXMLConfKeysForTesting.KEY_TEST_ACCUMULO_APPCONF
import static SliderXMLConfKeysForTesting.KEY_TEST_ACCUMULO_TAR
import static org.apache.slider.api.ResourceKeys.YARN_MEMORY
@@ -47,10 +54,14 @@
/**
* Anything specific to accumulo tests
*/
-abstract class AccumuloCommandTestBase extends CommandTestBase {
+abstract class AccumuloCommandTestBase extends CommandTestBase
+ implements SliderExitCodes, SliderActions {
public static final int ACCUMULO_LAUNCH_WAIT_TIME
public static final boolean ACCUMULO_TESTS_ENABLED
+ public static final FileUploader uploader
+ public Path ACCUMULO_TAR
+ public Path ACCUMULO_CONF
static {
ACCUMULO_LAUNCH_WAIT_TIME = getTimeOptionMillis(SLIDER_CONFIG,
@@ -58,17 +69,16 @@
1000 * DEFAULT_ACCUMULO_LAUNCH_TIME_SECONDS)
ACCUMULO_TESTS_ENABLED =
SLIDER_CONFIG.getBoolean(KEY_TEST_ACCUMULO_ENABLED, false)
+ uploader = new FileUploader(SLIDER_CONFIG, UserGroupInformation.currentUser)
}
public static void assumeAccumuloTestsEnabled() {
- assumeFunctionalTestsEnabled()
assume(ACCUMULO_TESTS_ENABLED, "Accumulo tests disabled")
}
- @Before
- public void verifyPreconditions() {
-
+ @BeforeClass
+ public static void verifyPreconditions() {
//if tests are not enabled: skip tests
assumeAccumuloTestsEnabled()
// but if they are -fail if the values are missing
@@ -76,6 +86,29 @@
getRequiredConfOption(SLIDER_CONFIG, OPTION_HADOOP_HOME)
}
+ @BeforeClass
+ public static void extendClasspath() {
+ addExtraJar(AccumuloClientProvider)
+ }
+
+ @Before
+ public void uploadFiles() {
+ File tar = new File(getRequiredConfOption(SLIDER_CONFIG,
+ KEY_TEST_ACCUMULO_TAR))
+ File conf = new File(getRequiredConfOption(SLIDER_CONFIG,
+ KEY_TEST_ACCUMULO_APPCONF))
+
+ //create the home dir or fail
+ Path home = uploader.mkHomeDir()
+
+ ACCUMULO_TAR = new Path(home, tar.getName())
+ ACCUMULO_CONF = new Path(home, "accumulo-conf")
+
+ // Upload the local accumulo tarball and conf directory to hdfs
+ uploader.copyIfOutOfDate(tar, ACCUMULO_TAR, false)
+ uploader.copyIfOutOfDate(conf, ACCUMULO_CONF, false)
+ }
+
/**
* Create an accumulo cluster
*
@@ -102,12 +135,10 @@
clusterOps[OPTION_HADOOP_HOME] = getRequiredConfOption(
SLIDER_CONFIG,
OPTION_HADOOP_HOME)
- argsList << Arguments.ARG_IMAGE <<
- getRequiredConfOption(SLIDER_CONFIG, KEY_TEST_ACCUMULO_TAR)
+ argsList << Arguments.ARG_IMAGE << ACCUMULO_TAR
- argsList << Arguments.ARG_CONFDIR <<
- getRequiredConfOption(SLIDER_CONFIG, KEY_TEST_ACCUMULO_APPCONF)
-
+ argsList << Arguments.ARG_CONFDIR << ACCUMULO_CONF
+
argsList << Arguments.ARG_OPTION << AccumuloKeys.OPTION_ACCUMULO_PASSWORD << password
argsList << ARG_RES_COMP_OPT << ROLE_MASTER <<
@@ -125,7 +156,7 @@
blockUntilRunning,
clusterOps)
}
-
+
public boolean loadClassesForMapReduce(Configuration conf) {
String[] neededClasses = [AccumuloInputFormat.class.getName(), TException.class.getName(), ZooStore.class.getName(), Tracer.class.getName()]
String[] neededJars = ["accumulo-core.jar", "libthrift.jar", "accumulo-fate.jar", "accumulo-trace.jar"]
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloCluster.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloClusterIT.groovy
similarity index 96%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloCluster.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloClusterIT.groovy
index 06fe21c..ca7cc65 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloCluster.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloClusterIT.groovy
@@ -18,7 +18,6 @@
import static org.apache.slider.providers.accumulo.AccumuloConfigFileOptions.*
import static org.apache.slider.providers.accumulo.AccumuloKeys.*
-import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.slider.common.SliderExitCodes
@@ -33,9 +32,8 @@
/**
*
*/
-@CompileStatic
@Slf4j
-class TestFunctionalAccumuloCluster extends AccumuloCommandTestBase
+class FunctionalAccumuloClusterIT extends AccumuloCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes {
@@ -108,9 +106,10 @@
extraArgs,
true,
clusterOps,
- "256",
+ "128",
getPassword()
)
+ ensureApplicationIsUp(getClusterName())
//get a slider client against the cluster
SliderClient sliderClient = bondToCluster(SLIDER_CONFIG, getClusterName())
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloM1T1GC1Mon1.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloM1T1GC1Mon1IT.groovy
similarity index 92%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloM1T1GC1Mon1.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloM1T1GC1Mon1IT.groovy
index 4f07b40..33910d4 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloM1T1GC1Mon1.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloM1T1GC1Mon1IT.groovy
@@ -18,15 +18,13 @@
package org.apache.slider.providers.accumulo.funtest
-import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.slider.api.ClusterDescription
import org.apache.slider.common.params.Arguments
import org.apache.slider.common.params.SliderActions
-@CompileStatic
@Slf4j
-public class TestFunctionalAccumuloM1T1GC1Mon1 extends TestFunctionalAccumuloCluster {
+public class FunctionalAccumuloM1T1GC1Mon1IT extends FunctionalAccumuloClusterIT {
@Override
public String getClusterName() {
diff --git a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java b/slider-providers/accumulo/accumulo-funtests/src/test/java/org/apache/slider/providers/accumulo/funtest/StubToForceGroovyTestsToCompile.java
similarity index 88%
rename from slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
rename to slider-providers/accumulo/accumulo-funtests/src/test/java/org/apache/slider/providers/accumulo/funtest/StubToForceGroovyTestsToCompile.java
index eefccbb..1a948a2 100644
--- a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/java/org/apache/slider/providers/accumulo/funtest/StubToForceGroovyTestsToCompile.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.slider.funtest.accumulo;
+package org.apache.slider.providers.accumulo.funtest;
-class StubToForceGroovySrcToCompile {
+class StubToForceGroovyTestsToCompile {
}
diff --git a/slider-providers/accumulo/slider-accumulo-provider/pom.xml b/slider-providers/accumulo/slider-accumulo-provider/pom.xml
index cabea00..bb5434a 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/pom.xml
+++ b/slider-providers/accumulo/slider-accumulo-provider/pom.xml
@@ -28,7 +28,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../../</relativePath>
</parent>
@@ -68,14 +68,6 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java
index db99360..7f99573 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java
@@ -23,6 +23,7 @@
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.common.SliderXmlConfKeys;
import org.apache.slider.api.OptionKeys;
@@ -135,7 +136,7 @@
propagateClientFSBinding(sitexml);
setDatabasePath(sitexml,
- globalInstanceOptions.getMandatoryOption(OptionKeys.INTERNAL_DATA_DIR_PATH));
+ globalInstanceOptions.getMandatoryOption(InternalKeys.INTERNAL_DATA_DIR_PATH));
String quorum =
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java
index c511efb..b8f4c00 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java
@@ -26,6 +26,7 @@
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.OptionKeys;
@@ -161,7 +162,7 @@
//Add binaries
//now add the image if it was set
String imageURI = instanceDefinition.getInternalOperations()
- .get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ .get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
fileSystem.maybeAddImagePath(launcher.getLocalResources(), imageURI);
CommandLineBuilder commandLine = new CommandLineBuilder();
@@ -169,6 +170,8 @@
String heap = "-Xmx" + appComponent.getOption(RoleKeys.JVM_HEAP, DEFAULT_JVM_HEAP);
String opt = "ACCUMULO_OTHER_OPTS";
if (SliderUtils.isSet(heap)) {
+/* JDK7
+
switch (role) {
case AccumuloKeys.ROLE_MASTER:
opt = "ACCUMULO_MASTER_OPTS";
@@ -183,6 +186,16 @@
opt = "ACCUMULO_GC_OPTS";
break;
}
+*/
+ if (AccumuloKeys.ROLE_MASTER.equals(role)) {
+ opt = "ACCUMULO_MASTER_OPTS";
+ } else if (AccumuloKeys.ROLE_TABLET.equals(role)) {
+ opt = "ACCUMULO_TSERVER_OPTS";
+ } else if (AccumuloKeys.ROLE_MONITOR.equals(role)) {
+ opt = "ACCUMULO_MONITOR_OPTS";
+ } else if (AccumuloKeys.ROLE_GARBAGE_COLLECTOR.equals(role)) {
+ opt = "ACCUMULO_GC_OPTS";
+ }
launcher.setEnv(opt, heap);
}
@@ -238,7 +251,7 @@
String accumuloScript = AccumuloClientProvider.buildScriptBinPath(instance);
- List<String> launchSequence = new ArrayList<>(8);
+ List<String> launchSequence = new ArrayList<String>(8);
launchSequence.add(0, accumuloScript);
Collections.addAll(launchSequence, commands);
return launchSequence;
@@ -333,11 +346,12 @@
//callback to AM to trigger cluster review is set up to happen after
//the init/verify action has succeeded
int delay = internalOperations.getGlobalOptions().getOptionInt(
- OptionKeys.INTERNAL_CONTAINER_STARTUP_DELAY,
- OptionKeys.DEFAULT_CONTAINER_STARTUP_DELAY);
+ InternalKeys.INTERNAL_CONTAINER_STARTUP_DELAY,
+ InternalKeys.DEFAULT_INTERNAL_CONTAINER_STARTUP_DELAY);
ProviderCompletedCallable completedCallable =
new ProviderCompletedCallable(execInProgress, null);
- Service notifier = new WorkflowCallbackService<>(
+ // JDK7
+ Service notifier = new WorkflowCallbackService(
"accumulo notifier",
completedCallable,
delay,
@@ -364,7 +378,7 @@
String dataDir = cd.getInternalOperations()
.getGlobalOptions()
.getMandatoryOption(
- OptionKeys.INTERNAL_DATA_DIR_PATH);
+ InternalKeys.INTERNAL_DATA_DIR_PATH);
Path accumuloInited = new Path(dataDir, INSTANCE_ID);
FileSystem fs2 = FileSystem.get(accumuloInited.toUri(), getConf());
return fs2.exists(accumuloInited);
@@ -388,7 +402,7 @@
@Override
public Map<String, String> buildProviderStatus() {
- Map<String,String> status = new HashMap<>();
+ Map<String,String> status = new HashMap<String, String>();
return status;
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy
index 1e2e27f..bf35207 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy
@@ -195,7 +195,7 @@
String clustername, List<Map<String, Integer>> plan) {
int planCount = plan.size()
assert planCount > 0
- createMiniCluster(clustername, getConfiguration(),
+ createMiniCluster(clustername, configuration,
1,
true);
//now launch the cluster
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy
index 348ccd4..2333fdf 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy
@@ -39,11 +39,11 @@
@Test
public void testAccM1T1GC1Mon1() throws Throwable {
- String clustername = "test_acc_m1t1gc1mon1"
int tablets = 1
int monitor = 1
int gc = 1
- createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+ String clustername = createMiniCluster( "",
+ configuration, 1, 1, 1, true, false)
describe(" Create an accumulo cluster");
//make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy
index 166c6c4..18c00f5 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy
@@ -29,8 +29,7 @@
@Test
public void testAccFlexTablets133Mgr113() throws Throwable {
- ClusterDescription cd = flexAccClusterTestRun(
- "test_acc_flex_tablets133mgr113",
+ ClusterDescription cd = flexAccClusterTestRun(createClusterName(),
[
[
(AccumuloKeys.ROLE_MASTER) : 1,
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy
index b084cdc..2e8f3c9 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy
@@ -44,7 +44,7 @@
(AccumuloKeys.ROLE_GARBAGE_COLLECTOR): 1]
ClusterDescription cd = flexAccClusterTestRun(
- "test_acc_flex_tablets1to3",
+ createClusterName(),
[plan1, plan2]
)
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy
index 143974f..6da00fb 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy
@@ -34,11 +34,11 @@
@Test
public void testAccFreezeThaw() throws Throwable {
- String clustername = "test_acc_freeze_thaw"
int tablets = 1
int monitor = 1
int gc = 1
- createMiniCluster(clustername, configuration, 1, 1, 1, true, false)
+ String clustername = createMiniCluster("",
+ configuration, 1, 1, 1, true, false)
describe(" Create an accumulo cluster");
//make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy
index 2f744bb..8d5890c 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy
@@ -36,11 +36,11 @@
@Test
public void testAccLiveHDFSArchive() throws Throwable {
- String clustername = "test_acc_live_hdfs_archive"
int tablets = 1
int monitor = 1
int gc = 1
- createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, true)
describe(" Create an accumulo cluster from an archive");
enableTestRunAgainstUploadedArchive();
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy
index 98b8b44..df0bbd9 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy
@@ -36,11 +36,11 @@
@Test
public void testAccLiveLocalArchive() throws Throwable {
- String clustername = "test_acc_live_local_archive"
int tablets = 1
int monitor = 1
int gc = 1
- createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, false)
describe(" Create an accumulo cluster from an archive");
//image mode
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy
index 7074294..d94eb36 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy
@@ -35,11 +35,10 @@
@Test
public void testAccM1T1GC1Mon1() throws Throwable {
- String clustername = "test_acc_m1t1gc1mon1"
int tablets = 1
int monitor = 1
int gc = 1
- createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+ String clustername = createMiniCluster( "", configuration, 1, 1, 1, true, false)
describe(" Create an accumulo cluster");
//make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy
index 253192b..2ed50f1 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy
@@ -36,12 +36,12 @@
@Test
public void testAccM1T1GC1Mon1() throws Throwable {
- String clustername = "test_acc_m2t2gc1mon1"
int master = 2
int tablets = 2
int monitor = 1
int gc = 1
- createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, false)
describe(" Create an accumulo cluster");
//make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy
index df40df7..4596b12 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy
@@ -38,11 +38,11 @@
@Test
public void testAccumuloAMWebApp() throws Throwable {
- String clustername = "test_accumulo_am_webapp"
int tablets = 1
int monitor = 1
int gc = 1
- createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+ String clustername = createMiniCluster( "",
+ configuration, 1, 1, 1, true, false)
describe(" Create an accumulo cluster");
//make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy
index d86a158..dca0c6b 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy
@@ -38,8 +38,7 @@
@Test
public void testInvalidMonitorAddress() throws Throwable {
- String clustername = "test_invalid_monitor_address"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that bad Java heap options are picked up"
diff --git a/slider-providers/hbase/hbase-funtests/pom.xml b/slider-providers/hbase/hbase-funtests/pom.xml
index f5a155b..857ef03 100644
--- a/slider-providers/hbase/hbase-funtests/pom.xml
+++ b/slider-providers/hbase/hbase-funtests/pom.xml
@@ -27,7 +27,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../../</relativePath>
</parent>
@@ -59,14 +59,6 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -85,8 +77,17 @@
<!-- functional test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>${maven-surefire-plugin.version}</version>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>${maven-failsafe-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>run-integration-tests</id>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
<configuration>
<!--mvn process fork options-->
@@ -107,15 +108,9 @@
<java.security.krb5.realm>${slider.test.java.security.krb5.realm}</java.security.krb5.realm>
<java.security.krb5.kdc>${slider.test.java.security.krb5.kdc}</java.security.krb5.kdc>
<!-- this property must be supplied-->
- <slider.conf.dir>../../../src/test/clusters/offline/slider</slider.conf.dir>
+ <slider.conf.dir>${slider.conf.dir}</slider.conf.dir>
<slider.bin.dir>../../../slider-assembly/target/slider-${project.version}-all/slider-${project.version}</slider.bin.dir>
</systemPropertyVariables>
- <includes>
- <include>**/Test*.java</include>
- </includes>
- <excludes>
- <exclude>**/Test*$*.java</exclude>
- </excludes>
</configuration>
</plugin>
@@ -145,6 +140,11 @@
<dependency>
<groupId>org.apache.slider</groupId>
<artifactId>slider-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.slider</groupId>
+ <artifactId>slider-core</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy
index 9385cc9..1bef7d3 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy
@@ -22,7 +22,7 @@
import org.apache.slider.api.ClusterDescription
import org.apache.slider.client.SliderClient
-class DemoHBaseCluster extends TestFunctionalHBaseCluster {
+class DemoHBaseCluster extends FunctionalHBaseClusterIT {
@Override
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestFunctionalHBaseCluster.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/FunctionalHBaseClusterIT.groovy
similarity index 98%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestFunctionalHBaseCluster.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/FunctionalHBaseClusterIT.groovy
index 3415b35..84e55f7 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestFunctionalHBaseCluster.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/FunctionalHBaseClusterIT.groovy
@@ -49,7 +49,7 @@
@CompileStatic
@Slf4j
-public class TestFunctionalHBaseCluster extends HBaseCommandTestBase
+public class FunctionalHBaseClusterIT extends HBaseCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes {
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseBuildSetup.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseBuildSetupIT.groovy
similarity index 91%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseBuildSetup.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseBuildSetupIT.groovy
index c8f3be3..0dcffde 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseBuildSetup.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseBuildSetupIT.groovy
@@ -22,12 +22,11 @@
import org.apache.slider.funtest.abstracttests.AbstractTestBuildSetup
import org.junit.Test
-class TestHBaseBuildSetup extends AbstractTestBuildSetup {
+class HBaseBuildSetupIT extends AbstractTestBuildSetup {
@Test
public void testHBaseBuildsHavePathsDefined() throws Throwable {
Configuration conf = loadSliderConf();
- assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
assumeBoolOption(conf, KEY_TEST_HBASE_ENABLED, true)
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterBuildDestroy.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterBuildDestroyIT.groovy
similarity index 95%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterBuildDestroy.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterBuildDestroyIT.groovy
index 3c9b8ed..3a44e30 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterBuildDestroy.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterBuildDestroyIT.groovy
@@ -34,7 +34,7 @@
@CompileStatic
@Slf4j
-public class TestHBaseClusterBuildDestroy extends HBaseCommandTestBase
+public class HBaseClusterBuildDestroyIT extends HBaseCommandTestBase
implements FuntestProperties, Arguments {
@@ -43,7 +43,6 @@
@BeforeClass
public static void prepareCluster() {
- assumeFunctionalTestsEnabled();
setupCluster(CLUSTER)
}
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterLifecycle.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterLifecycleIT.groovy
similarity index 91%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterLifecycle.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterLifecycleIT.groovy
index 01c7131..63b5fb6 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterLifecycle.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterLifecycleIT.groovy
@@ -34,7 +34,7 @@
@CompileStatic
@Slf4j
-public class TestHBaseClusterLifecycle extends HBaseCommandTestBase
+public class HBaseClusterLifecycleIT extends HBaseCommandTestBase
implements FuntestProperties, Arguments, SliderExitCodes {
@@ -118,11 +118,13 @@
log.info("Connected via Client {}", sliderClient.toString())
//freeze
- freeze(CLUSTER, [
+ def frozen = freeze(0, CLUSTER, [
ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
- ARG_MESSAGE, "freeze-in-test cluster lifecycle"
+ ARG_MESSAGE, "freeze-in-test-cluster-lifecycle"
])
+ frozen.assertExitCode(0)
+// sleep(FREEZE_WAIT_TIME)
//cluster exists if you don't want it to be live
exists(0, CLUSTER, false)
// condition returns false if it is required to be live
@@ -136,7 +138,7 @@
ARG_WAIT, Integer.toString(THAW_WAIT_TIME),
])
exists(0, CLUSTER)
- freeze(CLUSTER,
+ freeze(0, CLUSTER,
[
ARG_FORCE,
ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
@@ -165,7 +167,13 @@
StatusKeys.INFO_CONTAINERS_AM_RESTART)
assert restarted != null
assert Integer.parseInt(restarted) == 0
- freeze(CLUSTER)
+ freeze(0, CLUSTER,
+ [
+ ARG_FORCE,
+ ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
+ ARG_MESSAGE, "teardown-freeze"
+ ])
+
destroy(0, CLUSTER)
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy
index 8bad590..caaab04 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy
@@ -52,7 +52,6 @@
@BeforeClass
public static void extendClasspath() {
- assumeFunctionalTestsEnabled()
addExtraJar(HBaseClientProvider)
}
@@ -65,7 +64,6 @@
public void assumeHBaseTestsEnabled() {
- assumeFunctionalTestsEnabled()
assume(HBASE_TESTS_ENABLED, "HBase tests disabled")
}
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseIntegration.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseIntegrationIT.groovy
similarity index 91%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseIntegration.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseIntegrationIT.groovy
index 4bf18f6..21a7494 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseIntegration.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseIntegrationIT.groovy
@@ -24,14 +24,17 @@
import org.apache.hadoop.util.ToolRunner
import org.apache.slider.api.ClusterDescription
import org.apache.slider.client.SliderClient
-import org.apache.slider.providers.hbase.HBaseConfigFileOptions;
+import org.apache.slider.providers.hbase.HBaseConfigFileOptions
+import org.junit.Ignore;
/* Runs IntegrationTestIngest on cluster
*
* Note: this test runs for about 20 minutes
* please set slider.test.timeout.seconds accordingly
*/
-class TestHBaseIntegration extends TestFunctionalHBaseCluster {
+
+@Ignore("appears localhost only")
+class HBaseIntegrationIT extends FunctionalHBaseClusterIT {
@Override
String getClusterName() {
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseLoad.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseLoadIT.groovy
similarity index 90%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseLoad.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseLoadIT.groovy
index 3d6c46c..61bcc70 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseLoad.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseLoadIT.groovy
@@ -23,8 +23,10 @@
import org.apache.slider.api.ClusterDescription
import org.apache.slider.client.SliderClient
import org.apache.slider.providers.hbase.HBaseConfigFileOptions
+import org.junit.Assume
+import org.junit.Ignore
-class TestHBaseLoad extends TestFunctionalHBaseCluster {
+class HBaseLoadIT extends FunctionalHBaseClusterIT {
@Override
String getClusterName() {
@@ -32,6 +34,11 @@
}
@Override
+ void testHBaseCreateCluster() throws Throwable {
+ super.testHBaseCreateCluster()
+ }
+
+ @Override
void clusterOperations(
String clustername,
SliderClient sliderClient,
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseNodeFailure.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseNodeFailureIT.groovy
similarity index 98%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseNodeFailure.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseNodeFailureIT.groovy
index fab73b3..cd87fab 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseNodeFailure.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseNodeFailureIT.groovy
@@ -27,7 +27,7 @@
import org.apache.slider.providers.hbase.HBaseKeys
import org.apache.slider.providers.hbase.HBaseTestUtils
-class TestHBaseNodeFailure extends TestFunctionalHBaseCluster {
+class HBaseNodeFailureIT extends FunctionalHBaseClusterIT {
public static final int RESTART_SLEEP_TIME = 5000
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestImages.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/ImagesIT.groovy
similarity index 92%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestImages.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/ImagesIT.groovy
index 128e087..d1b5c55 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestImages.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/ImagesIT.groovy
@@ -26,12 +26,11 @@
import org.junit.Before
import org.junit.Test
-class TestImages extends CommandTestBase implements FuntestProperties {
+class ImagesIT extends CommandTestBase implements FuntestProperties {
@Before
public void verifyPreconditions() {
- assumeBoolOption(SLIDER_CONFIG, KEY_SLIDER_FUNTESTS_ENABLED, true)
assumeBoolOption(SLIDER_CONFIG, KEY_TEST_HBASE_ENABLED, true)
}
diff --git a/slider-providers/hbase/slider-hbase-provider/pom.xml b/slider-providers/hbase/slider-hbase-provider/pom.xml
index 381a1b6..b93ce14 100644
--- a/slider-providers/hbase/slider-hbase-provider/pom.xml
+++ b/slider-providers/hbase/slider-hbase-provider/pom.xml
@@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.slider</groupId>
<artifactId>slider</artifactId>
- <version>0.40</version>
+ <version>0.50.0-incubating</version>
<relativePath>../../../</relativePath>
</parent>
@@ -47,14 +47,6 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin.version}</version>
- <configuration>
- <compilerId>groovy-eclipse-compiler</compilerId>
- <!-- set verbose to be true if you want lots of uninteresting messages -->
- <!-- <verbose>true</verbose> -->
- <source>${project.java.src.version}</source>
- <target>${project.java.src.version}</target>
- </configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.groovy</groupId>
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java
index c40c5f2..9ad872f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java
@@ -21,6 +21,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.common.SliderXmlConfKeys;
import org.apache.slider.api.OptionKeys;
@@ -107,7 +108,7 @@
MapOperations worker = appconf.getMandatoryComponent(HBaseKeys.ROLE_WORKER);
- Map<String, String> sitexml = new HashMap<>();
+ Map<String, String> sitexml = new HashMap<String, String>();
//map all cluster-wide site. options
providerUtils.propagateSiteOptions(globalAppOptions, sitexml);
@@ -122,7 +123,7 @@
sitexml.put(KEY_HBASE_ROOTDIR,
globalInstanceOptions.getMandatoryOption(
- OptionKeys.INTERNAL_DATA_DIR_PATH)
+ InternalKeys.INTERNAL_DATA_DIR_PATH)
);
providerUtils.propagateOption(globalAppOptions, OptionKeys.ZOOKEEPER_PATH,
sitexml, KEY_ZNODE_PARENT);
@@ -209,7 +210,7 @@
}
}
- private static Set<String> knownRoleNames = new HashSet<>();
+ private static Set<String> knownRoleNames = new HashSet<String>();
static {
List<ProviderRole> roles = HBaseRoles.getRoles();
knownRoleNames.add(SliderKeys.COMPONENT_AM);
@@ -239,6 +240,12 @@
providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_MASTER,
0, -1);
+ providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_REST_GATEWAY,
+ 0, -1);
+ providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_THRIFT_GATEWAY,
+ 0, -1);
+ providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_THRIFT2_GATEWAY,
+ 0, -1);
}
@Override
@@ -256,7 +263,7 @@
// add any and all dependency files
Map<String, LocalResource> providerResources =
- new HashMap<>();
+ new HashMap<String, LocalResource>();
ProviderUtils.addProviderJar(providerResources,
this,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java
index 1d6ca70..2a20438 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java
@@ -26,9 +26,23 @@
String ROLE_MASTER = MASTER;
+ String ROLE_REST_GATEWAY = "REST";
+
+ String ROLE_THRIFT_GATEWAY = "THRIFT";
+
+ String ROLE_THRIFT2_GATEWAY = "THRIFT2";
+
/** {@value */
String REGION_SERVER = "regionserver";
+ /** {@value */
+ String REST_GATEWAY = "rest";
+
+ /** {@value */
+ String THRIFT_GATEWAY = "thrift";
+ /** {@value */
+ String THRIFT2_GATEWAY = "thrift2";
+ /**
/**
* What is the command for hbase to print a version: {@value}
*/
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
index f9a5628..82e535f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
@@ -22,9 +22,9 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.slider.api.InternalKeys;
import org.apache.slider.common.SliderKeys;
import org.apache.slider.api.ClusterDescription;
-import org.apache.slider.api.OptionKeys;
import org.apache.slider.api.RoleKeys;
import org.apache.slider.api.StatusKeys;
import org.apache.slider.core.conf.AggregateConf;
@@ -68,11 +68,8 @@
* This class implements the server-side aspects
* of an HBase Cluster
*/
-public class HBaseProviderService extends AbstractProviderService implements
- ProviderCore,
- HBaseKeys,
- SliderKeys,
- AgentRestOperations{
+public class HBaseProviderService extends AbstractProviderService
+ implements ProviderCore, HBaseKeys, SliderKeys, AgentRestOperations{
protected static final Logger log =
LoggerFactory.getLogger(HBaseProviderService.class);
@@ -109,8 +106,8 @@
* @param instanceDefinition the instance definition to validate
*/
@Override // Client and Server
- public void validateInstanceDefinition(AggregateConf instanceDefinition) throws
- SliderException {
+ public void validateInstanceDefinition(AggregateConf instanceDefinition)
+ throws SliderException {
clientProvider.validateInstanceDefinition(instanceDefinition);
}
@@ -152,7 +149,7 @@
//Add binaries
//now add the image if it was set
String imageURI = instanceDefinition.getInternalOperations()
- .get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+ .get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
coreFS.maybeAddImagePath(launcher.getLocalResources(), imageURI);
CommandLineBuilder cli = new CommandLineBuilder();
@@ -180,6 +177,8 @@
String roleCommand;
String logfile;
//now look at the role
+
+/* JDK7
switch (role) {
case ROLE_WORKER:
//role is region server
@@ -191,10 +190,52 @@
logfile = "/master.txt";
break;
+ case ROLE_REST_GATEWAY:
+ roleCommand = REST_GATEWAY;
+
+ logfile = "/rest-gateway.txt";
+ break;
+ case ROLE_THRIFT_GATEWAY:
+ roleCommand = THRIFT_GATEWAY;
+
+ logfile = "/thrift-gateway.txt";
+ break;
+ case ROLE_THRIFT2_GATEWAY:
+ roleCommand = THRIFT2_GATEWAY;
+
+ logfile = "/thrift2-gateway.txt";
+ break;
default:
throw new SliderInternalStateException("Cannot start role %s", role);
}
+*/
+ if (ROLE_WORKER.equals(role)) {
+ //role is region server
+ roleCommand = REGION_SERVER;
+ logfile = "/region-server.txt";
+
+ } else if (ROLE_MASTER.equals(role)) {
+ roleCommand = MASTER;
+ logfile = "/master.txt";
+
+ } else if (ROLE_REST_GATEWAY.equals(role)) {
+ roleCommand = REST_GATEWAY;
+ logfile = "/rest-gateway.txt";
+
+ } else if (ROLE_THRIFT_GATEWAY.equals(role)) {
+ roleCommand = THRIFT_GATEWAY;
+ logfile = "/thrift-gateway.txt";
+
+ } else if (ROLE_THRIFT2_GATEWAY.equals(role)) {
+ roleCommand = THRIFT2_GATEWAY;
+ logfile = "/thrift2-gateway.txt";
+ }
+
+ else {
+ throw new SliderInternalStateException("Cannot start role %s", role);
+ }
+
cli.add(roleCommand);
cli.add(ACTION_START);
//log details
@@ -304,7 +345,7 @@
* @return the provider status - map of entries to add to the info section
*/
public Map<String, String> buildProviderStatus() {
- Map<String, String> stats = new HashMap<>();
+ Map<String, String> stats = new HashMap<String, String>();
return stats;
}
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java
index 01776f7..b2825b8 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java
@@ -37,12 +37,20 @@
public static final int KEY_MASTER = SliderKeys.ROLE_AM_PRIORITY_INDEX + 2;
+ public static final int KEY_REST_GATEWAY = SliderKeys.ROLE_AM_PRIORITY_INDEX + 3;
+
+ public static final int KEY_THRIFT_GATEWAY = SliderKeys.ROLE_AM_PRIORITY_INDEX + 4;
+
+ public static final int KEY_THRIFT2_GATEWAY = SliderKeys.ROLE_AM_PRIORITY_INDEX + 5;
/**
* Initialize role list
*/
static {
ROLES.add(new ProviderRole(HBaseKeys.ROLE_WORKER, KEY_WORKER));
ROLES.add(new ProviderRole(HBaseKeys.ROLE_MASTER, KEY_MASTER));
+ ROLES.add(new ProviderRole(HBaseKeys.ROLE_REST_GATEWAY, KEY_REST_GATEWAY));
+ ROLES.add(new ProviderRole(HBaseKeys.ROLE_THRIFT_GATEWAY, KEY_THRIFT_GATEWAY));
+ ROLES.add(new ProviderRole(HBaseKeys.ROLE_THRIFT_GATEWAY, KEY_THRIFT2_GATEWAY));
}
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy
index 3f49771..6a69e17 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy
@@ -26,7 +26,6 @@
import org.apache.slider.api.ClusterDescription
import org.apache.slider.api.ClusterNode
import org.apache.slider.api.ResourceKeys
-import org.apache.slider.providers.hbase.HBaseKeys
import org.apache.slider.client.SliderClient
import org.apache.slider.core.main.ServiceLauncher
import org.apache.slider.providers.hbase.HBaseTestUtils
@@ -214,9 +213,7 @@
(ROLE_MASTER): masters,
(ROLE_WORKER): workers,
];
- extraArgs << ARG_RES_COMP_OPT << ROLE_MASTER << ResourceKeys.YARN_MEMORY << YRAM
- extraArgs << ARG_RES_COMP_OPT << ROLE_WORKER << ResourceKeys.YARN_MEMORY << YRAM
- extraArgs << ARG_PROVIDER << PROVIDER_HBASE;
+ hbaseArgs(extraArgs)
return createCluster(clustername,
roles,
@@ -226,6 +223,15 @@
[:])
}
+ public List<String> hbaseArgs(List<String> extraArgs) {
+ extraArgs << ARG_RES_COMP_OPT << ROLE_MASTER << ResourceKeys.YARN_MEMORY <<
+ YRAM
+ extraArgs << ARG_RES_COMP_OPT << ROLE_WORKER << ResourceKeys.YARN_MEMORY <<
+ YRAM
+ extraArgs << ARG_PROVIDER << PROVIDER_HBASE;
+ return extraArgs;
+ }
+
/**
* Create an AM without a master
* @param clustername AM name
@@ -241,9 +247,7 @@
];
return createCluster(clustername,
roles,
- [
- ARG_PROVIDER, PROVIDER_HBASE
- ],
+ hbaseArgs([]),
deleteExistingData,
blockUntilRunning,
[:])
@@ -278,62 +282,94 @@
int masters,
int masterFlexTarget,
int workers,
- int flexTarget,
+ int workerFlexTarget,
boolean testHBaseAfter) {
- createMiniCluster(clustername, configuration,
- 1,
- true);
+ clustername = buildClustername(clustername);
+ SliderClient sliderClient = startHBaseCluster(clustername, masters, workers)
+
+ //now flex
+ return flexCluster(
+ sliderClient,
+ clustername,
+ masterFlexTarget,
+ workerFlexTarget,
+ testHBaseAfter)
+
+ }
+
+ public SliderClient startHBaseCluster(
+ String clustername,
+ int masters,
+ int workers) {
+ clustername = createMiniCluster(clustername, configuration,
+ 1,
+ true);
//now launch the cluster
SliderClient sliderClient;
ServiceLauncher<SliderClient> launcher = createCluster(clustername,
- [
- (ROLE_MASTER): masters,
- (ROLE_WORKER): workers,
- ],
- [
- ARG_RES_COMP_OPT , ROLE_MASTER, ResourceKeys.YARN_MEMORY, YRAM,
- ARG_RES_COMP_OPT , ROLE_WORKER, ResourceKeys.YARN_MEMORY, YRAM,
- ARG_PROVIDER , PROVIDER_HBASE
- ],
- true,
- true,
- [:]);
+ [
+ (ROLE_MASTER): masters,
+ (ROLE_WORKER): workers,
+ ],
+ hbaseArgs([]),
+ true,
+ true,
+ [:]);
sliderClient = launcher.service;
- try {
- basicHBaseClusterStartupSequence(sliderClient);
- describe("Waiting for initial worker count of $workers");
+ basicHBaseClusterStartupSequence(sliderClient);
- //verify the #of roles is as expected
- //get the hbase status
- waitForWorkerInstanceCount(sliderClient, workers, hbaseClusterStartupToLiveTime);
- waitForSliderMasterCount(sliderClient, masters, hbaseClusterStartupToLiveTime);
+ describe("Waiting for initial worker count of $workers");
- log.info("Slider worker count at $workers, waiting for region servers to match");
- waitForHBaseRegionServerCount(sliderClient, clustername, workers, hbaseClusterStartupToLiveTime);
+ //verify the #of roles is as expected
+ //get the hbase status
+ waitForWorkerInstanceCount(
+ sliderClient,
+ workers,
+ hbaseClusterStartupToLiveTime);
+ waitForSliderMasterCount(
+ sliderClient,
+ masters,
+ hbaseClusterStartupToLiveTime);
- //now flex
- describe("Flexing masters:$masters -> $masterFlexTarget ; workers $workers -> $flexTarget");
- boolean flexed;
- flexed = 0 == sliderClient.flex(clustername,
- [
- (ROLE_WORKER): flexTarget,
- (ROLE_MASTER): masterFlexTarget
- ]
- );
- waitForWorkerInstanceCount(sliderClient, flexTarget, hbaseClusterStartupToLiveTime);
- waitForSliderMasterCount(sliderClient, masterFlexTarget,
- hbaseClusterStartupToLiveTime);
+ log.info(
+ "Slider worker count at $workers, waiting for region servers to match");
+ waitForHBaseRegionServerCount(
+ sliderClient,
+ clustername,
+ workers,
+ hbaseClusterStartupToLiveTime);
+ sliderClient
+ }
- if (testHBaseAfter) {
- waitForHBaseRegionServerCount(sliderClient, clustername, flexTarget,
- hbaseClusterStartupToLiveTime);
- }
- return flexed;
- } finally {
- maybeStopCluster(sliderClient, null, "end of flex test run");
+ public boolean flexCluster(
+ SliderClient sliderClient,
+ String clustername,
+ int masterFlexTarget,
+ int workerFlexTarget,
+ boolean testHBaseAfter) {
+ int flexTarget
+ describe(
+ "Flexing masters -> $masterFlexTarget ; workers -> ${workerFlexTarget}");
+ boolean flexed;
+ flexed = 0 == sliderClient.flex(clustername,
+ [
+ (ROLE_WORKER): workerFlexTarget,
+ (ROLE_MASTER): masterFlexTarget
+ ]
+ );
+ waitForWorkerInstanceCount(
+ sliderClient,
+ workerFlexTarget,
+ hbaseClusterStartupToLiveTime);
+ waitForSliderMasterCount(sliderClient, masterFlexTarget,
+ hbaseClusterStartupToLiveTime);
+
+ if (testHBaseAfter) {
+ waitForHBaseRegionServerCount(sliderClient, clustername, workerFlexTarget,
+ hbaseClusterStartupToLiveTime);
}
-
+ flexed
}
/**
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy
index c9e0b24..85726a7 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy
@@ -37,9 +37,8 @@
@Test
public void testFreezeThawClusterFromArchive() throws Throwable {
- String clustername = "test_freeze_thaw_cluster_from_archive"
int regionServerCount = 1
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
switchToImageDeploy = true
ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername, regionServerCount, [], true, true)
SliderClient sliderClient = launcher.service
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy
index e5bfb66..22fa4c7 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy
@@ -37,8 +37,8 @@
@Test
public void testLiveClusterFromArchive() throws Throwable {
- String clustername = testClusterName
int regionServerCount = 1
+ String clustername = testClusterName
createMiniCluster(clustername,
configuration,
regionServerCount + 1,
@@ -66,7 +66,7 @@
}
public String getTestClusterName() {
- return "test_live_cluster_from_archive"
+ return "testliveclusterfromarchive"
}
public boolean startHDFS() {
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy
index 2245e2c..cecee3f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy
@@ -30,7 +30,7 @@
@Override
String getTestClusterName() {
- "test_live_cluster_from_archiveonhdfs"
+ "testliveclusterfromarchiveonhdfs"
}
@Override
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy
index a2af619..c305b5b 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy
@@ -38,8 +38,7 @@
@Test
public void test_build_thaw_cluster_m1_w1() throws Throwable {
- String clustername = "test_build_thaw_cluster_m1_w1"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that a built cluster can be thawed"
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy
index 16cdef6..fe739c4 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy
@@ -39,19 +39,18 @@
@Test
public void testFailedRegionService() throws Throwable {
- testRegionService("test_failed_region_service", true)
+ testRegionService("", true)
}
@Test
public void testStoppedRegionService() throws Throwable {
- testRegionService("test_stopped_region_service", false)
+ testRegionService("", false)
}
private void testRegionService(String testName, boolean toKill) {
- String clustername = testName
String action = toKill ? "kill" : "stop"
int regionServerCount = 2
- createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+ String clustername = createMiniCluster(testName, configuration, 1, 1, 1, true, true)
describe("Create a single region service cluster then " + action + " the RS");
//now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy
index 2237c5d..d219636 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy
@@ -49,7 +49,6 @@
public void testKilledHBaseAM() throws Throwable {
skip("SLIDER-66: AM Restart Failing -YARN issues")
- String clustername = "test_killed_hbase_am"
int regionServerCount = 1
@@ -58,7 +57,7 @@
conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, 3)
conf.set(YarnConfiguration.RM_SCHEDULER, FIFO_SCHEDULER);
- createMiniCluster(clustername, conf, 1, 1, 1, true, false)
+ String clustername = createMiniCluster("", conf, 1, 1, 1, true, false)
describe(" Kill the AM, expect cluster to die");
//now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy
index 35ed129..1e19d71 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy
@@ -40,9 +40,9 @@
@Test
public void testKilledHBaseMaster() throws Throwable {
- String clustername = "test_killed_hbase_master"
int regionServerCount = 1
- createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, true)
describe("Kill the hbase master and expect a restart");
//now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailureThreshold.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
similarity index 65%
rename from slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailureThreshold.groovy
rename to slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
index e43ad81..eb44ae0 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailureThreshold.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
@@ -23,10 +23,10 @@
import org.apache.hadoop.hbase.ClusterStatus
import org.apache.hadoop.yarn.api.records.ApplicationReport
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus
+import org.apache.slider.api.ResourceKeys
import org.apache.slider.core.main.ServiceLauncher
import org.apache.slider.common.SliderExitCodes
import org.apache.slider.api.ClusterDescription
-import org.apache.slider.api.OptionKeys
import org.apache.slider.core.exceptions.BadClusterStateException
import org.apache.slider.core.exceptions.ErrorStrings
import org.apache.slider.common.params.Arguments
@@ -34,6 +34,8 @@
import org.apache.slider.providers.hbase.minicluster.HBaseMiniClusterTestBase
import org.junit.Test
+import static org.apache.slider.providers.hbase.HBaseKeys.ROLE_WORKER
+
/**
* test that if a container is killed too many times,
* the AM stays down
@@ -41,39 +43,61 @@
@CompileStatic
@Slf4j
-class TestFailureThreshold extends HBaseMiniClusterTestBase {
+class TestRegionServerFailureThreshold extends HBaseMiniClusterTestBase {
@Test
- public void testFailedRegionService() throws Throwable {
- failureThresholdTestRun("test_failure_threshold", true, 2, 5)
+ public void testRegionServerFailureThreshold() throws Throwable {
+ failureThresholdTestRun("", true, 2, 5)
}
-
-
+ /**
+ * Sets the failure threshold then runs the #of kill attempts
+ * @param testName
+ * @param toKill
+ * @param threshold
+ * @param killAttempts
+ */
private void failureThresholdTestRun(
String testName,
boolean toKill,
int threshold,
int killAttempts) {
- String clustername = testName
String action = toKill ? "kill" : "stop"
- int regionServerCount = 2
- createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+ int regionServerCount = 1
+ String clustername = createMiniCluster(testName, configuration, 1, 1, 1, true, true)
describe(
- "Create a single region service cluster then " + action + " the RS");
+ "Create a single region service HBase instance" +
+ "then $action the RS $killAttempts times with a threshold of $threshold");
//now launch the cluster
+ def globalThreshold = threshold - 1
ServiceLauncher<SliderClient> launcher = createHBaseCluster(
clustername,
regionServerCount,
[
- Arguments.ARG_OPTION, OptionKeys.INTERNAL_CONTAINER_FAILURE_THRESHOLD,
- Integer.toString(threshold)
+ Arguments.ARG_RES_COMP_OPT,
+ ROLE_WORKER,
+ ResourceKeys.CONTAINER_FAILURE_THRESHOLD,
+ Integer.toString(threshold),
+
+ Arguments.ARG_RESOURCE_OPT,
+ ResourceKeys.CONTAINER_FAILURE_THRESHOLD,
+ Integer.toString(globalThreshold)
],
true,
true)
SliderClient client = launcher.service
addToTeardown(client);
+ def aggregateConf = client.loadPersistedClusterDescription(clustername)
+ log.info aggregateConf.toString()
+
+ def resourceOperations = aggregateConf.resourceOperations
+ def failureOptValue = resourceOperations.globalOptions.getMandatoryOptionInt(
+ ResourceKeys.CONTAINER_FAILURE_THRESHOLD)
+ assert globalThreshold == failureOptValue
+ def workerThreshold = resourceOperations.getComponentOptInt(ROLE_WORKER,
+ ResourceKeys.CONTAINER_FAILURE_THRESHOLD, 0)
+ assert threshold == workerThreshold
ClusterDescription status = client.getClusterDescription(clustername)
ClusterStatus clustat = basicHBaseClusterStartupSequence(client)
@@ -109,7 +133,7 @@
describe("waiting for recovery")
//and expect a recovery
- if (restarts < threshold) {
+ if (restarts <= threshold) {
def restartTime = 1000
status = waitForWorkerInstanceCount(
@@ -125,20 +149,30 @@
//expect the cluster to have failed
try {
def finalCD = client.getClusterDescription(clustername)
- dumpClusterDescription("expected the AM to have failed", finalCD)
+ describe( "failure threshold ignored")
+ dumpClusterDescription("expected the cluster to have failed", finalCD)
+ describe "stopping cluster"
+ maybeStopCluster(
+ client,
+ "",
+ "stopping cluster that isn't failing correctly")
+
+
fail("AM had not failed after $restarts worker kills")
} catch (BadClusterStateException e) {
- assert e.toString().contains(ErrorStrings.E_APPLICATION_NOT_RUNNING)
- assert e.exitCode == SliderExitCodes.EXIT_BAD_STATE
+ assertExceptionDetails(e,
+ SliderExitCodes.EXIT_BAD_STATE,
+ ErrorStrings.E_APPLICATION_NOT_RUNNING)
//success
break;
}
}
}
} catch (BadClusterStateException e) {
- assert e.toString().contains(ErrorStrings.E_APPLICATION_NOT_RUNNING)
- assert e.exitCode == SliderExitCodes.EXIT_BAD_STATE
+ assertExceptionDetails(e,
+ SliderExitCodes.EXIT_BAD_STATE,
+ ErrorStrings.E_APPLICATION_NOT_RUNNING)
}
ApplicationReport report = client.applicationReport
log.info(report.diagnostics)
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy
index 3a66873..ec8d264 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy
@@ -30,7 +30,7 @@
@Test
public void testClusterFlex0To1() throws Throwable {
- assert flexHBaseClusterTestRun("test_cluster_flex_0to1", 1, 1, 0, 1, false)
+ assert flexHBaseClusterTestRun("", 1, 1, 0, 1, false)
}
}
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy
index ba1bb94..fdbbce8 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy
@@ -32,9 +32,9 @@
class TestClusterFlex1To1 extends HBaseMiniClusterTestBase {
@Test
- public void testClusterFlexPersistent() throws Throwable {
+ public void testClusterFlex1To1() throws Throwable {
assert !flexHBaseClusterTestRun(
- "test_cluster_flex_1to1",
+ "",
1,
1,
1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy
index fd8e1ae..be38c3d 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy
@@ -31,9 +31,9 @@
class TestClusterFlex1To2 extends HBaseMiniClusterTestBase {
@Test
- public void testClusterFlex() throws Throwable {
+ public void testClusterFlex1To2() throws Throwable {
assert flexHBaseClusterTestRun(
- "test_cluster_flex_1to2",
+ "",
1,
1,
1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy
index c76a9d3..e36e067 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy
@@ -34,7 +34,7 @@
@Test
public void testClusterFlex2DownTo1() throws Throwable {
assert flexHBaseClusterTestRun(
- "test_cluster_flex_2_down_to_1",
+ "",
1, 1,
2,
1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy
index 39c1cac..683c02a 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy
@@ -31,9 +31,9 @@
class TestClusterFlex2To5 extends HBaseMiniClusterTestBase {
@Test
- public void testClusterFlex() throws Throwable {
+ public void testClusterFlex2To5() throws Throwable {
assert flexHBaseClusterTestRun(
- "test_cluster_flex_2to5",
+ "",
1,
1,
2,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownMultiple.groovy
similarity index 72%
copy from slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
copy to slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownMultiple.groovy
index 0910c6e..97a9b35 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownMultiple.groovy
@@ -20,6 +20,7 @@
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
+import org.apache.slider.client.SliderClient
import org.apache.slider.providers.hbase.minicluster.HBaseMiniClusterTestBase
import org.junit.Test
@@ -28,17 +29,29 @@
*/
@CompileStatic
@Slf4j
-class TestHMasterFlex1To2 extends HBaseMiniClusterTestBase {
+class TestClusterFlexDownMultiple extends HBaseMiniClusterTestBase {
@Test
- public void testClusterFlex() throws Throwable {
- assert flexHBaseClusterTestRun(
- "test_hmaster_flex_1to2",
+ public void testClusterFlexDownMultiple() throws Throwable {
+ def clusterName = createClusterName();
+ SliderClient sliderClient = startHBaseCluster(clusterName, 1, 3)
+
+ assert flexCluster(
+ sliderClient,
+ clusterName,
1,
2,
+ true)
+
+ assert flexCluster(
+ sliderClient,
+ clusterName,
1,
1,
true)
+
+
+
}
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy
index 5b392e8..c43c5bd 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy
@@ -33,7 +33,7 @@
@Test
public void testClusterFlexDownToZero() throws Throwable {
assert flexHBaseClusterTestRun(
- "test_cluster_flex_down_to_zero",
+ "",
1,
1,
1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexHMasterFlex1To2.groovy
similarity index 89%
rename from slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
rename to slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexHMasterFlex1To2.groovy
index 0910c6e..298f40f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexHMasterFlex1To2.groovy
@@ -28,12 +28,12 @@
*/
@CompileStatic
@Slf4j
-class TestHMasterFlex1To2 extends HBaseMiniClusterTestBase {
+class TestClusterFlexHMasterFlex1To2 extends HBaseMiniClusterTestBase {
@Test
- public void testClusterFlex() throws Throwable {
+ public void testClusterMasterFlex1To2() throws Throwable {
assert flexHBaseClusterTestRun(
- "test_hmaster_flex_1to2",
+ "",
1,
2,
1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy
index 62e4d4b..f6748e0 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy
@@ -25,9 +25,9 @@
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.ClusterStatus
import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.api.InternalKeys
import org.apache.slider.core.main.ServiceLauncher
import org.apache.slider.api.ClusterDescription
-import org.apache.slider.api.OptionKeys
import org.apache.slider.core.build.InstanceIO
import org.apache.slider.providers.hbase.HBaseKeys
import org.apache.slider.common.tools.ConfigHelper
@@ -47,13 +47,12 @@
@Test
public void testFreezeReconfigureThawLiveRegionService() throws Throwable {
- String clustername = "test_freeze_reconfigure_thaw_live_regionservice"
int regionServerCount = 4
int nodemanagers = 3
YarnConfiguration conf = configuration
//one vcore per node
conf.setInt("yarn.nodemanager.resource.cpu-vcores", 1)
- createMiniCluster(clustername, conf, nodemanagers, true)
+ String clustername = createMiniCluster("", conf, nodemanagers, true)
describe(
"Create a $regionServerCount node cluster, freeze it, patch the configuration files," +
" thaw it and verify that it came back with the new settings")
@@ -94,7 +93,7 @@
clusterDir)
def snapshotPath = instanceDefinition.internalOperations.get(
- OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH)
+ InternalKeys.INTERNAL_SNAPSHOT_CONF_PATH)
assert snapshotPath != null
Path confdir = new Path(snapshotPath);
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy
index f9d460e..66dd4f0 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy
@@ -41,9 +41,8 @@
@Test
public void testFreezeThawLiveRegionService() throws Throwable {
- String clustername = "test_freeze_thaw_live_regionservice"
int regionServerCount = 2
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe("Create a cluster, freeze it, thaw it and verify that it came back ")
//use a smaller AM HEAP to include it in the test cycle
ServiceLauncher launcher = createHBaseCluster(clustername, regionServerCount,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy
index c2652b8..47f5e1c 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy
@@ -41,9 +41,8 @@
@Test
public void test2Master2RS() throws Throwable {
- String clustername = "test2master2rs"
int regionServerCount = 2
- createMiniCluster(clustername, configuration, 1, 1, 1, true, false)
+ String clustername = createMiniCluster("", configuration, 1, 1, 1, true, false)
describe(" Create a two master, two region service cluster");
//now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy
index 97714d6..634ebe2 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy
@@ -46,8 +46,7 @@
@Test
public void testHBaseMaster() throws Throwable {
- String clustername = "test_hbase_master"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
//make sure that ZK is up and running at the binding string
ZKIntegration zki = createZKIntegrationInstance(ZKBinding, clustername, false, false, 5000)
//now launch the cluster with 1 region server
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy
index fffd6c0..7f51f95 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy
@@ -35,8 +35,8 @@
@Test
public void testHBaseMasteOnHDFS() throws Throwable {
- String clustername = "test_hbase_master_on_hdfs"
- createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, true)
log.info("HDFS is at $fsDefaultName")
assert fsDefaultName.startsWith("hdfs://")
ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername, 1, [], true, true)
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy
index 9ca5f45..aeb3c47 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy
@@ -42,8 +42,7 @@
@Test
public void testHBaseMasterWithBadHeap() throws Throwable {
- String clustername = "test_hbase_master_with_bad_heap"
- createMiniCluster(clustername, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that bad Java heap options are picked up"
//now launch the cluster with 1 region server
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy
index 26292fb..957d167 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy
@@ -37,9 +37,9 @@
@Test
public void testLiveRegionServiceOnHDFS() throws Throwable {
- String clustername = "test_live_region_service_on_hdfs"
int regionServerCount = 1
- createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, true)
describe(" Create a single region service cluster");
//make sure that ZK is up and running at the binding string
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy
index 9b4b8a7..3561d2f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy
@@ -41,9 +41,9 @@
@Test
public void testLiveTwoNodeRegionService() throws Throwable {
- String clustername = "test_live_two_node_regionservice"
int regionServerCount = 2
- createMiniCluster(clustername, configuration, 1, 1, 1, true, false)
+ String clustername = createMiniCluster(
+ "", configuration, 1, 1, 1, true, false)
describe(" Create a two node region service cluster");
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy
index 0140030..7e4c5ed 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy
@@ -42,10 +42,9 @@
*/
@Test
public void testTwoLiveClusters() throws Throwable {
- def name = "test_two_live_clusters"
- createMiniCluster(name, configuration, 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
- String clustername1 = name + "-1"
+ String clustername1 = clustername + "-1"
//now launch the cluster
int regionServerCount = 1
ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername1, regionServerCount, [], true, true)
@@ -62,7 +61,7 @@
waitForHBaseRegionServerCount(sliderClient, clustername1, 1, hbaseClusterStartupToLiveTime)
//now here comes cluster #2
- String clustername2 = name + "-2"
+ String clustername2 = clustername + "-2"
String zkpath = "/$clustername2"
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy
index c44de16..cffde39 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy
@@ -42,8 +42,7 @@
public void testRoleOptPropagation() throws Throwable {
skip("Disabled")
- String clustername = "test_role_opt_propagation"
- createMiniCluster(clustername, getConfiguration(), 1, true)
+ String clustername = createMiniCluster("", getConfiguration(), 1, true)
describe "verify that role options propagate down to deployed roles"
@@ -71,8 +70,7 @@
@Test
public void testUnknownRole() throws Throwable {
- String clustername = "test_unknown_role"
- createMiniCluster(clustername, getConfiguration(), 1, true)
+ String clustername = createMiniCluster("", configuration, 1, true)
describe "verify that unknown role results in cluster creation failure"
try {
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy
index 2cdf2bb..c5e0282 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy
@@ -46,9 +46,8 @@
@Test
public void testSliderConfDirToMasterlessAM() throws Throwable {
- String clustername = "test_slider_conf_dir_to_masterless_am"
YarnConfiguration conf = configuration
- createMiniCluster(clustername, conf, 1, true)
+ String clustername = createMiniCluster("", conf, 1, true)
describe "verify that a conf dir will propagate via the sytem proerpty"
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties b/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties
index a552a55..8f633b2 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties
@@ -42,6 +42,7 @@
log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN
log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
log4j.logger.org.apache.hadoop.hdfs=WARN
+log4j.logger.BlockStateChange=WARN
log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN
@@ -57,3 +58,4 @@
log4j.logger.org.apache.hadoop.yarn.util.AbstractLivelinessMonitor=WARN
log4j.logger.org.apache.hadoop.yarn.server.nodemanager.security=WARN
log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMNMInfo=WARN
+log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager=WARN
\ No newline at end of file
diff --git a/src/test/clusters/c6401/slider/log4j.properties b/src/test/clusters/c6401/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/c6401/slider/log4j.properties
+++ b/src/test/clusters/c6401/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
# log4j configuration used during build and unit tests
diff --git a/src/test/clusters/morzine/slider/log4j.properties b/src/test/clusters/morzine/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/morzine/slider/log4j.properties
+++ b/src/test/clusters/morzine/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
# log4j configuration used during build and unit tests
diff --git a/src/test/clusters/offline/slider/log4j.properties b/src/test/clusters/offline/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/offline/slider/log4j.properties
+++ b/src/test/clusters/offline/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
# log4j configuration used during build and unit tests
diff --git a/src/test/clusters/offline/slider/slider-client.xml b/src/test/clusters/offline/slider/slider-client.xml
index 8385086..25c01cf 100644
--- a/src/test/clusters/offline/slider/slider-client.xml
+++ b/src/test/clusters/offline/slider/slider-client.xml
@@ -38,11 +38,6 @@
</property>
<property>
- <name>slider.funtest.enabled</name>
- <value>false</value>
- </property>
-
- <property>
<name>yarn.application.classpath</name>
<value>
/etc/hadoop/conf,/usr/lib/hadoop/*,/usr/lib/hadoop/lib/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-hdfs/lib/*,/usr/lib/hadoop-yarn/*,/usr/lib/hadoop-yarn/lib/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-mapreduce/lib/*
@@ -62,12 +57,6 @@
</property>
<property>
- <name>slider.test.zkhosts</name>
- <description>list of the zookeeper hosts</description>
- <value></value>
- </property>
-
- <property>
<name>slider.test.accumulo.enabled</name>
<description>Flag to enable/disable Accumulo tests</description>
<value>false</value>
diff --git a/src/test/clusters/remote/slider/log4j.properties b/src/test/clusters/remote/slider/log4j.properties
index f672472..5b59190 100644
--- a/src/test/clusters/remote/slider/log4j.properties
+++ b/src/test/clusters/remote/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
# log4j configuration used during build and unit tests
diff --git a/src/test/clusters/sandbox/slider/log4j.properties b/src/test/clusters/sandbox/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/sandbox/slider/log4j.properties
+++ b/src/test/clusters/sandbox/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
# log4j configuration used during build and unit tests
diff --git a/src/test/clusters/sandbox/slider/slider-client.xml b/src/test/clusters/sandbox/slider/slider-client.xml
index 30937ec..5ac5d59 100644
--- a/src/test/clusters/sandbox/slider/slider-client.xml
+++ b/src/test/clusters/sandbox/slider/slider-client.xml
@@ -52,7 +52,7 @@
<property>
<name>slider.test.agent.enabled</name>
<description>Flag to enable/disable Agent tests</description>
- <value>false</value>
+ <value>true</value>
</property>