Merge pull request #231 from mikewalch/checksums

Created checksums file and updated for Accumulo 2.x & Hadoop 3.x
diff --git a/ansible/roles/accumulo/tasks/init-accumulo.yml b/ansible/roles/accumulo/tasks/init-accumulo.yml
index 9e45215..b2ee10b 100644
--- a/ansible/roles/accumulo/tasks/init-accumulo.yml
+++ b/ansible/roles/accumulo/tasks/init-accumulo.yml
@@ -1,5 +1,5 @@
 - name: "determine if accumulo needs to be initialized"
-  command: "{{ hadoop_prefix }}/bin/hdfs dfs -stat /accumulo"
+  command: "{{ hadoop_home }}/bin/hdfs dfs -stat /accumulo"
   register: accumulo_stat
   changed_when: accumulo_stat.rc != 0
   failed_when: accumulo_stat.rc != 0 and 'No such file or directory' not in accumulo_stat.stderr
diff --git a/ansible/roles/accumulo/tasks/main.yml b/ansible/roles/accumulo/tasks/main.yml
index e5e64de..811dc05 100644
--- a/ansible/roles/accumulo/tasks/main.yml
+++ b/ansible/roles/accumulo/tasks/main.yml
@@ -8,7 +8,18 @@
     - monitor_logger.xml
     - log4j.properties
   when: accumulo_major_version == '1'
-- name: "configure accumulo using managed templates"
+- name: "configure accumulo 2.0 configuration"
+  template: src={{ item }} dest={{ accumulo_home }}/conf/{{ item }}
+  with_items:
+    - accumulo-env.sh
+    - accumulo.properties
+    - accumulo-client.properties
+    - gc
+    - tracers
+    - masters
+    - monitor
+  when: accumulo_major_version == '2'
+- name: "configure accumulo 1.0 configuration"
   template: src={{ item }} dest={{ accumulo_home }}/conf/{{ item }}
   with_items:
     - accumulo-env.sh
@@ -18,6 +29,7 @@
     - tracers
     - masters
     - monitor
+  when: accumulo_major_version == '1'
 - name: "configure accumulo to send metrics (if metrics server exists)"
   template: src={{ item }} dest={{ accumulo_home }}/conf/{{ item }}
   with_items:
diff --git a/ansible/roles/accumulo/templates/accumulo-client.properties b/ansible/roles/accumulo/templates/accumulo-client.properties
new file mode 100644
index 0000000..985b259
--- /dev/null
+++ b/ansible/roles/accumulo/templates/accumulo-client.properties
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+################################
+## Accumulo client configuration
+################################
+
+## Name of Accumulo instance to connect to
+instance.name={{ accumulo_instance }}
+
+## Zookeeper connection information for Accumulo instance
+instance.zookeepers={{ zookeeper_connect }}
+
+## Authentication method (i.e password, kerberos, PasswordToken, KerberosToken, etc)
+auth.type=password
+
+## Accumulo principal/username for chosen authentication method
+auth.principal=root
+
+## Authentication token (ex. mypassword, /path/to/keytab)
+auth.token={{ accumulo_password }}
diff --git a/ansible/roles/accumulo/templates/accumulo-env.sh b/ansible/roles/accumulo/templates/accumulo-env.sh
index ca64e7f..d63188f 100755
--- a/ansible/roles/accumulo/templates/accumulo-env.sh
+++ b/ansible/roles/accumulo/templates/accumulo-env.sh
@@ -16,13 +16,13 @@
 # limitations under the License.
 
 export ACCUMULO_LOG_DIR={{ worker_data_dirs[0] }}/logs/accumulo
-export HADOOP_PREFIX={{ hadoop_prefix }}
-export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
 export ZOOKEEPER_HOME={{ zookeeper_home }}
 export JAVA_HOME={{ java_home }}
 
 {% if accumulo_major_version == '1' %}
 
+export HADOOP_PREFIX={{ hadoop_home }}
+export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
 export ACCUMULO_TSERVER_OPTS="-Xmx{{ accumulo_tserv_mem }} -Xms{{ accumulo_tserv_mem }}"
 export ACCUMULO_MASTER_OPTS="-Xmx256m -Xms256m"
 export ACCUMULO_MONITOR_OPTS="-Xmx128m -Xms64m"
@@ -32,17 +32,14 @@
 export ACCUMULO_OTHER_OPTS="-Xmx256m -Xms64m"
 export ACCUMULO_KILL_CMD='kill -9 %p'
 export NUM_TSERVERS=1
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
 
 {% else %}
 
-CLASSPATH="$(find "$ZOOKEEPER_HOME"/ "$HADOOP_PREFIX"/share/hadoop/{common,common/lib,hdfs,mapreduce,yarn} -maxdepth 1 -name '*.jar' \
-  -and -not -name '*slf4j*' \
-  -and -not -name '*fatjar*' \
-  -and -not -name '*-javadoc*' \
-  -and -not -name '*-sources*.jar' \
-  -and -not -name '*-test*.jar' \
-  -print0 | tr '\0' ':')$CLASSPATH"
-CLASSPATH="${conf}:${lib}/*:${HADOOP_CONF_DIR}:${CLASSPATH}"
+export HADOOP_HOME={{ hadoop_home }}
+export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"
+
+CLASSPATH="${conf}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${HADOOP_HOME}/share/hadoop/client/*"
 export CLASSPATH
 
 JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}"
@@ -80,7 +77,6 @@
 esac
 export JAVA_OPTS
 
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
 export LD_LIBRARY_PATH="${HADOOP_PREFIX}/lib/native:${LD_LIBRARY_PATH}"
 {% endif %}
-
-export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
diff --git a/ansible/roles/accumulo/templates/accumulo-site.xml b/ansible/roles/accumulo/templates/accumulo-site.xml
index 5e79942..8dec8b5 100644
--- a/ansible/roles/accumulo/templates/accumulo-site.xml
+++ b/ansible/roles/accumulo/templates/accumulo-site.xml
@@ -40,8 +40,6 @@
     <name>tserver.walog.max.size</name>
     <value>512M</value>
   </property>
-
-{% if accumulo_major_version == '1' %}
   <property>
     <name>tserver.cache.data.size</name>
     <value>{{ accumulo_dcache_size }}</value>
@@ -81,5 +79,4 @@
       $HADOOP_PREFIX/share/hadoop/yarn/lib/jersey.*.jar
     </value>
   </property>
-{% endif %}
 </configuration>
diff --git a/ansible/roles/accumulo/templates/accumulo.properties b/ansible/roles/accumulo/templates/accumulo.properties
new file mode 100644
index 0000000..44edeca
--- /dev/null
+++ b/ansible/roles/accumulo/templates/accumulo.properties
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main configuration file for Apache Accumulo. Available configuration properties can be
+# found in the Accumulo documentation on the Accumulo project website (https://accumulo.apache.org/)
+# Link for Accumulo 2.0: https://accumulo.apache.org/docs/2.0/administration/properties
+
+## Time to wait on I/O for simple, short RPC calls
+general.rpc.timeout=240s
+
+## A secret unique to a given instnace that servers must know to communicate
+instance.secret=muchos
+
+## Sets location in HDFS where Accumulo will store data
+instance.volumes={{ hdfs_root }}/accumulo
+
+## Sets location of Zookeepers
+instance.zookeeper.host={{ zookeeper_connect }}
+
+## The durability used to write the write-ahead log
+table.durability=flush
+
+## Enables C++ in-memory data store that limits Java GC pauses
+tserver.memory.maps.native.enabled=true
+
+## Minimum number of threads to use to handle incoming requests
+tserver.server.threads.minimum=64
+
+## The maximum size for each write-ahead log
+tserver.walog.max.size=512M
diff --git a/ansible/roles/common/templates/bash_profile b/ansible/roles/common/templates/bash_profile
index b180a6c..af79087 100644
--- a/ansible/roles/common/templates/bash_profile
+++ b/ansible/roles/common/templates/bash_profile
@@ -12,8 +12,8 @@
 PATH=$PATH:{{ accumulo_home }}/bin
 PATH=$PATH:{{ fluo_home }}/bin
 PATH=$PATH:{{ fluo_yarn_home }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/sbin
+PATH=$PATH:{{ hadoop_home }}/bin
+PATH=$PATH:{{ hadoop_home }}/sbin
 PATH=$PATH:{{ hub_home }}/bin
 PATH=$PATH:{{ maven_home }}/bin
 PATH=$PATH:{{ spark_home }}/bin
diff --git a/ansible/roles/common/templates/bashrc b/ansible/roles/common/templates/bashrc
index 425ebb0..fd6b387 100644
--- a/ansible/roles/common/templates/bashrc
+++ b/ansible/roles/common/templates/bashrc
@@ -6,9 +6,15 @@
 fi
 
 export JAVA_HOME={{ java_home }}
-export HADOOP_PREFIX={{ hadoop_prefix }}
-export HADOOP_HOME=$HADOOP_PREFIX
+
+{% if hadoop_major_version == '2' %}
+export HADOOP_PREFIX={{ hadoop_home }}
 export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
+{% else %}
+export HADOOP_HOME={{ hadoop_home }}
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+{% endif %}
+
 export ZOOKEEPER_HOME={{ zookeeper_home }}
 export SPARK_HOME={{ spark_home }}
 export ACCUMULO_HOME={{ accumulo_home }}
@@ -16,7 +22,7 @@
 export FLUO_YARN_HOME={{ fluo_yarn_home }}
 
 alias ssh='ssh -A'
-alias cdh='cd {{ hadoop_prefix }}'
+alias cdh='cd {{ hadoop_home }}'
 alias cdz='cd {{ zookeeper_home }}'
 alias cda='cd {{ accumulo_home }}'
 alias cdf='cd {{ fluo_home }}'
diff --git a/ansible/roles/common/templates/root_bashrc b/ansible/roles/common/templates/root_bashrc
index 500851a..0e5c2eb 100644
--- a/ansible/roles/common/templates/root_bashrc
+++ b/ansible/roles/common/templates/root_bashrc
@@ -7,9 +7,15 @@
 
 # User specific aliases and functions
 export JAVA_HOME={{ java_home }}
-export HADOOP_PREFIX={{ hadoop_prefix }}
-export HADOOP_HOME=$HADOOP_PREFIX
+
+{% if hadoop_major_version == '2' %}
+export HADOOP_PREFIX={{ hadoop_home }}
 export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
+{% else %}
+export HADOOP_HOME={{ hadoop_home }}
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+{% endif %}
+
 export ZOOKEEPER_HOME={{ zookeeper_home }}
 export SPARK_HOME={{ spark_home }}
 export ACCUMULO_HOME={{ accumulo_home }}
@@ -18,8 +24,8 @@
 PATH=$JAVA_HOME/bin:$PATH
 PATH=$PATH:{{ accumulo_home }}/bin
 PATH=$PATH:{{ fluo_home }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/sbin
+PATH=$PATH:{{ hadoop_home }}/bin
+PATH=$PATH:{{ hadoop_home }}/sbin
 PATH=$PATH:{{ hub_home }}/bin
 PATH=$PATH:{{ maven_home }}/bin
 PATH=$PATH:{{ spark_home }}/bin
diff --git a/ansible/roles/hadoop/tasks/main.yml b/ansible/roles/hadoop/tasks/main.yml
index e091d62..fdb63e5 100644
--- a/ansible/roles/hadoop/tasks/main.yml
+++ b/ansible/roles/hadoop/tasks/main.yml
@@ -1,7 +1,7 @@
 - name: "install hadoop tarball"
-  unarchive: src={{ tarballs_dir }}/{{ hadoop_tarball }} dest={{ install_dir }} creates={{ hadoop_prefix }} copy=yes
+  unarchive: src={{ tarballs_dir }}/{{ hadoop_tarball }} dest={{ install_dir }} creates={{ hadoop_home }} copy=yes
 - name: "configure hadoop with templates"
-  template: src={{ item }} dest={{ hadoop_prefix }}/etc/hadoop/{{ item }}
+  template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
   with_items:
     - core-site.xml
     - hdfs-site.xml
@@ -9,14 +9,14 @@
     - mapred-site.xml
     - slaves
 - name: "copy spark yarn shuffle jar to hadoop lib"
-  command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_prefix }}/share/hadoop/yarn/lib/ creates={{ hadoop_prefix }}/share/hadoop/yarn/lib/spark-{{ spark_version }}-yarn-shuffle.jar
+  command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_home }}/share/hadoop/yarn/lib/ creates={{ hadoop_home }}/share/hadoop/yarn/lib/spark-{{ spark_version }}-yarn-shuffle.jar
   when: "'spark' in groups"
 - name: "setup hadoop short circuit socket dir"
   file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_user }} mode=0755
   become: yes
 - name: "Configure hadoop log dir"
   replace:
-    path: "{{ hadoop_prefix }}/etc/hadoop/hadoop-env.sh"
+    path: "{{ hadoop_home }}/etc/hadoop/hadoop-env.sh"
     regexp: '.*export\s+HADOOP_LOG_DIR.*'
     replace: "export HADOOP_LOG_DIR={{ worker_data_dirs[0] }}/logs/hadoop"
 - name: "Create hadoop log dir"
diff --git a/ansible/roles/hadoop/tasks/start-hdfs.yml b/ansible/roles/hadoop/tasks/start-hdfs.yml
index d0718f9..bb58e52 100644
--- a/ansible/roles/hadoop/tasks/start-hdfs.yml
+++ b/ansible/roles/hadoop/tasks/start-hdfs.yml
@@ -1,8 +1,8 @@
 - name: "format namenode"
-  command: "{{ hadoop_prefix }}/bin/hdfs namenode -format"
+  command: "{{ hadoop_home }}/bin/hdfs namenode -format"
   args:
     creates: "{{ worker_data_dirs[0] }}/hadoop/name"
 - name: "start hdfs"
-  command: "{{ hadoop_prefix }}/sbin/start-dfs.sh"
+  command: "{{ hadoop_home }}/sbin/start-dfs.sh"
   register: start_hdfs
   changed_when: "': starting' in start_hdfs.stdout"
diff --git a/ansible/roles/hadoop/tasks/start-yarn.yml b/ansible/roles/hadoop/tasks/start-yarn.yml
index 978fb0f..7477e87 100644
--- a/ansible/roles/hadoop/tasks/start-yarn.yml
+++ b/ansible/roles/hadoop/tasks/start-yarn.yml
@@ -1,4 +1,4 @@
 - name: "start yarn"
-  command: "{{ hadoop_prefix }}/sbin/start-yarn.sh"
+  command: "{{ hadoop_home }}/sbin/start-yarn.sh"
   register: start_yarn_result
   changed_when: start_yarn_result.stdout | search("starting (:?resource|node)manager")
diff --git a/ansible/roles/mesos/templates/mesos-slave b/ansible/roles/mesos/templates/mesos-slave
index a5e3390..9003eb0 100644
--- a/ansible/roles/mesos/templates/mesos-slave
+++ b/ansible/roles/mesos/templates/mesos-slave
@@ -1,3 +1,3 @@
 MASTER=`cat /etc/mesos/zk`
-MESOS_HADOOP_HOME={{ hadoop_prefix }}
+MESOS_HADOOP_HOME={{ hadoop_home }}
 JAVA_HOME={{ java_home }}
diff --git a/ansible/roles/spark/tasks/start-spark-history.yml b/ansible/roles/spark/tasks/start-spark-history.yml
index 85395ac..ecf67a3 100644
--- a/ansible/roles/spark/tasks/start-spark-history.yml
+++ b/ansible/roles/spark/tasks/start-spark-history.yml
@@ -1,5 +1,5 @@
 - name: "ensure spark history directory exists in hdfs" 
-  command: "{{ hadoop_prefix}}/bin/hdfs dfs -mkdir -p /spark/history"
+  command: "{{ hadoop_home}}/bin/hdfs dfs -mkdir -p /spark/history"
   register: mk_hist_dir
   changed_when: mk_hist_dir.rc != 0
 - name: "start spark history server"
diff --git a/ansible/roles/spark/templates/spark-env.sh b/ansible/roles/spark/templates/spark-env.sh
index 477f242..f190eb7 100755
--- a/ansible/roles/spark/templates/spark-env.sh
+++ b/ansible/roles/spark/templates/spark-env.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 
-export SPARK_DIST_CLASSPATH=$({{ hadoop_prefix }}/bin/hadoop classpath)
-export HADOOP_CONF_DIR={{ hadoop_prefix }}/etc/hadoop
+export SPARK_DIST_CLASSPATH=$({{ hadoop_home }}/bin/hadoop classpath)
+export HADOOP_CONF_DIR={{ hadoop_home }}/etc/hadoop
diff --git a/ansible/wipe.yml b/ansible/wipe.yml
index 763db89..b5d81e0 100644
--- a/ansible/wipe.yml
+++ b/ansible/wipe.yml
@@ -16,7 +16,7 @@
   - name: "wipe software installation dirs"
     file: path={{ item }} state=absent
     with_items:
-      - "{{ hadoop_prefix }}"
+      - "{{ hadoop_home }}"
       - "{{ zookeeper_home }}"
       - "{{ accumulo_home }}"
       - "{{ fluo_home }}"
@@ -27,7 +27,7 @@
     file: path={{item}}/hadoop state=absent
     with_items: "{{ worker_data_dirs }}"
   - name: "remove hadoop logs"
-    shell: rm -rf {{ hadoop_prefix }}/logs/*
+    shell: rm -rf {{ hadoop_home }}/logs/*
 - hosts: zookeepers
   tasks:
   - name: "wipe zookeeper data"
diff --git a/conf/checksums b/conf/checksums
new file mode 100644
index 0000000..af0c4aa
--- /dev/null
+++ b/conf/checksums
@@ -0,0 +1,20 @@
+accumulo:1.9.2:c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5
+accumulo:1.9.0:f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe
+accumulo:1.8.1:eba3bfe823935ca7901ea7c2bd59c84a68b9381361699c7e260bbd9191f237f4
+accumulo:1.7.4:3776dddbc2a09f4a9d7a2ae4958e212e91eb5067a124a628330edbee4e32e754
+accumulo:1.7.3:294f2f1f3fbc164b68e80cecd5a6ce5c245df804fb35ae5e03ab1c86bc9480da
+fluo:1.2.0:037f89cd2bfdaf76a1368256c52de46d6b9a85c9c1bfc776ec4447d02c813fb2
+fluo_yarn:1.0.0:c6220d35cf23127272f3b5638c44586504dc17a46f5beecdfee5027b5ff874b0
+hadoop:3.1.1:f837fe260587f71629aad1f4fb6719274e948111dc96ffc5a8e26f27deac5602
+hadoop:3.0.2:0d507aa71007b2685e292343c11c2cb90a92ea7625446b57d1fb47c5721e2f82
+hadoop:2.9.0:8d48666f29f9ade6ed2762b7a9edab177bad2c57396f43d0ffd6a269d54f6fe1
+hadoop:2.8.4:6b545972fdd73173887cdbc3e1cbd3cc72068271924edea82a0e7e653199b115
+hadoop:2.8.3:e8bf9a53337b1dca3b152b0a5b5e277dc734e76520543e525c301a050bb27eae
+hadoop:2.7.6:f2327ea93f4bc5a5d7150dee8e0ede196d3a77ff8526a7dd05a48a09aae25669
+hadoop:2.7.5:0bfc4d9b04be919be2fdf36f67fa3b4526cdbd406c512a7a1f5f1b715661f831
+hadoop:2.6.5:001ad18d4b6d0fe542b15ddadba2d092bc97df1c4d2d797381c8d12887691898
+spark:2.2.2:023b2fea378b3dd0fee2d5d1de6bfaf2d8349aefe7be97a9cbcf03bbacc428d7
+zookeeper:3.4.13:7ced798e41d2027784b8fd55c908605ad5bd94a742d5dab2506be8f94770594d
+zookeeper:3.4.12:c686f9319050565b58e642149cb9e4c9cc8c7207aacc2cb70c5c0672849594b9
+zookeeper:3.4.11:f6bd68a1c8f7c13ea4c2c99f13082d0d71ac464ffaf3bf7a365879ab6ad10e84
+zookeeper:3.4.10:7f7f5414e044ac11fee2a1e0bc225469f51fb0cdf821e67df762a43098223f27
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 64fc905..a98057b 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -33,20 +33,13 @@
 accumulo_instance = muchos
 # Accumluo Password
 accumulo_password = secret
-# Software versions
+# Software versions (set sha-256 in conf/checksums)
 hadoop_version = 2.8.4
 zookeeper_version = 3.4.12
 spark_version = 2.2.2
 fluo_version = 1.2.0
 fluo_yarn_version = 1.0.0
 accumulo_version = 1.9.2
-# Software sha256 checksums
-hadoop_sha256 = 6b545972fdd73173887cdbc3e1cbd3cc72068271924edea82a0e7e653199b115
-zookeeper_sha256 = c686f9319050565b58e642149cb9e4c9cc8c7207aacc2cb70c5c0672849594b9
-spark_sha256 = 023b2fea378b3dd0fee2d5d1de6bfaf2d8349aefe7be97a9cbcf03bbacc428d7
-fluo_sha256 = 037f89cd2bfdaf76a1368256c52de46d6b9a85c9c1bfc776ec4447d02c813fb2
-fluo_yarn_sha256 = c6220d35cf23127272f3b5638c44586504dc17a46f5beecdfee5027b5ff874b0
-accumulo_sha256 = c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5
 
 [ec2]
 # AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
diff --git a/lib/muchos/config.py b/lib/muchos/config.py
index 4d643b1..2aa8996 100644
--- a/lib/muchos/config.py
+++ b/lib/muchos/config.py
@@ -22,7 +22,7 @@
 
 class DeployConfig(ConfigParser):
 
-    def __init__(self, deploy_path, config_path, hosts_path, cluster_name):
+    def __init__(self, deploy_path, config_path, hosts_path, checksums_path, cluster_name):
         ConfigParser.__init__(self)
         self.optionxform = str
         self.deploy_path = deploy_path
@@ -35,6 +35,8 @@
         self.metrics_drive_root = 'media-' + self.ephemeral_root
         self.node_d = None
         self.hosts = None
+        self.checksums_path = checksums_path
+        self.checksums_d = None
         self.init_nodes()
 
     def verify_config(self, action):
@@ -118,8 +120,30 @@
     def version(self, software_id):
         return self.get('general', software_id + '_version')
 
-    def sha256(self, software_id):
-        return self.get('general', software_id + '_sha256')
+    def checksum(self, software):
+        return self.checksum_ver(software, self.version(software))
+
+    def checksum_ver(self, software, version):
+        if not os.path.isfile(self.checksums_path):
+            exit('ERROR - A checksums file does not exist at %s' % self.hosts_path)
+
+        if not self.checksums_d:
+            self.checksums_d = {}
+            with open(self.checksums_path) as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("#") or not line:
+                        continue
+                    args = line.split(':')
+                    if len(args) == 3:
+                        self.checksums_d["{0}:{1}".format(args[0], args[1])] = args[2]
+                    else:
+                        exit('ERROR - Bad line %s in checksums %s' % (line, self.checksums_path))
+
+        key = "{0}:{1}".format(software, version)
+        if key not in self.checksums_d:
+            exit('ERROR - Failed to find checksums for {0} {1} in {2}' % (software, version, self.checksums_path))
+        return self.checksums_d[key]
 
     def verify_instance_type(self, instance_type):
         if get_arch(instance_type) == 'pvm':
@@ -205,6 +229,8 @@
                 else:
                     exit('ERROR - Bad line %s in hosts %s' % (line, self.hosts_path))
 
+
+
     def get_hosts(self):
         if self.hosts is None:
             self.parse_hosts()
@@ -276,9 +302,10 @@
   'fluo_yarn_home': '"{{ install_dir }}/fluo-yarn-{{ fluo_yarn_version }}"',
   'fluo_yarn_tarball': 'fluo-yarn-{{ fluo_yarn_version }}-bin.tar.gz',
   'fluo_yarn_version': None,
-  'hadoop_prefix': '"{{ install_dir }}/hadoop-{{ hadoop_version }}"',
+  'hadoop_home': '"{{ install_dir }}/hadoop-{{ hadoop_version }}"',
   'hadoop_tarball': 'hadoop-{{ hadoop_version }}.tar.gz',
   'hadoop_version': None,
+  'hadoop_major_version': '"{{ hadoop_version.split(\'.\')[0] }}"',
   'hdfs_root': 'hdfs://{{ groups[\'namenode\'][0] }}:8020',
   'install_dir': '"{{ cluster_basedir }}/install"',
   'java_home': '"/usr/lib/jvm/java-1.8.0-openjdk"',
@@ -303,13 +330,13 @@
   'accumulo_imap_size': None,
   'accumulo_sha256': None,
   'accumulo_tserv_mem': None,
+  'fluo_sha256': None,
   'fluo_worker_instances_multiplier': None,
   'fluo_worker_mem_mb': None,
   'fluo_worker_threads': None,
+  'fluo_yarn_sha256': None,
   'force_format': None,
   'fstype': None,
-  'fluo_sha256': None,
-  'fluo_yarn_sha256': None,
   'hadoop_sha256': None,
   'hub_version': '2.2.3',
   'hub_home': '"{{ install_dir }}/hub-linux-amd64-{{ hub_version }}"',
diff --git a/lib/muchos/main.py b/lib/muchos/main.py
index 49bbb4a..0d8d0f4 100644
--- a/lib/muchos/main.py
+++ b/lib/muchos/main.py
@@ -200,6 +200,13 @@
                     if name in play_vars:
                         play_vars[name] = value
 
+        play_vars['accumulos_sha256'] = config.checksum('accumulo')
+        play_vars['fluo_sha256'] = config.checksum('fluo')
+        play_vars['fluo_yarn_sha256'] = config.checksum('fluo_yarn')
+        play_vars['hadoop_sha256'] = config.checksum('hadoop')
+        play_vars['spark_sha256'] = config.checksum('spark')
+        play_vars['zookeeper_sha256'] = config.checksum('zookeeper')
+
         cloud_provider = host_vars.get('cloud_provider', 'ec2')
         node_type_map = {}
         if cloud_provider == 'ec2':
@@ -427,6 +434,9 @@
     config_path = join(deploy_path, "conf/muchos.props")
     if not isfile(config_path):
         exit('ERROR - A config file does not exist at '+config_path)
+    checksums_path = join(deploy_path, "conf/checksums")
+    if not isfile(checksums_path):
+        exit('ERROR - A checksums file does not exist at '+checksums_path)
 
     hosts_dir = join(deploy_path, "conf/hosts/")
 
@@ -439,7 +449,7 @@
 
     hosts_path = join(hosts_dir, opts.cluster)
 
-    config = DeployConfig(deploy_path, config_path, hosts_path, opts.cluster)
+    config = DeployConfig(deploy_path, config_path, hosts_path, checksums_path, opts.cluster)
     config.verify_config(action)
 
     cluster = MuchosCluster(config)
diff --git a/lib/tests/test_config.py b/lib/tests/test_config.py
index 738036f..4502cd6 100644
--- a/lib/tests/test_config.py
+++ b/lib/tests/test_config.py
@@ -17,7 +17,9 @@
 
 def test_defaults():
     c = DeployConfig("muchos", '../conf/muchos.props.example', '../conf/hosts/example/example_cluster',
-                     'mycluster')
+                     '../conf/checksums', 'mycluster')
+    assert c.checksum_ver('accumulo', '1.9.0') == 'f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe'
+    assert c.checksum('accumulo') == 'c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5'
     assert c.get('ec2', 'default_instance_type') == 'm5d.large'
     assert c.get('ec2', 'worker_instance_type') == 'm5d.large'
     assert c.get('ec2', 'aws_ami') == 'ami-9887c6e7'
@@ -61,7 +63,7 @@
 
 def test_case_sensitive():
     c = DeployConfig("muchos", '../conf/muchos.props.example', '../conf/hosts/example/example_cluster',
-                     'mycluster')
+                     '../conf/checksums', 'mycluster')
     assert c.has_option('ec2', 'default_instance_type') == True
     assert c.has_option('ec2', 'Default_instance_type') == False
     c.set('nodes', 'CamelCaseWorker', 'worker,fluo')