Merge pull request #93 from mikewalch/fd-90

Closes #90 #92 - Add system monitoring to Grafana/InfluxDB
diff --git a/bin/impl/fluo_deploy/config.py b/bin/impl/fluo_deploy/config.py
index 7915f05..d558b4f 100644
--- a/bin/impl/fluo_deploy/config.py
+++ b/bin/impl/fluo_deploy/config.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from ConfigParser import ConfigParser
-from util import get_num_ephemeral, exit, get_arch
+from util import get_num_ephemeral, exit, get_arch, get_ami
 import os
 from os.path import join
 
@@ -43,6 +43,9 @@
       self.proxy_public_ip()
 
     if action in ['launch', 'setup']:
+      self.get_image_id(self.default_instance_type())
+      self.get_image_id(self.worker_instance_type())
+
       for service in SERVICES:
         if service not in ['fluo', 'metrics', 'dev']:
           if not self.has_service(service):
@@ -182,25 +185,10 @@
   def aws_secret_key(self):
     return self.get('ec2', 'aws.secret.key')
 
-  def detailed_monitoring(self):
-    if self.get('ec2', 'detailed.monitoring') == "true":
-      return True
-    return False
-
-  def hvm_ami(self):
-    return self.get('ec2', 'hvm.ami')
-
-  def pvm_ami(self):
-    return self.get('ec2', 'pvm.ami')
-
   def get_image_id(self, instance_type):
-    arch = get_arch(instance_type)
-    if arch == "hvm":
-      return self.hvm_ami()
-    elif arch == "pvm":
-      return self.pvm_ami()
-    else:
-      return None
+    if get_arch(instance_type) == 'pvm':
+      exit("ERROR - Configuration contains instance type '{0}' that uses pvm architecture.  Only hvm architecture is supported!".format(instance_type))
+    return get_ami(instance_type, self.region())
 
   def region(self):
     return self.get('ec2', 'region')
diff --git a/bin/impl/fluo_deploy/main.py b/bin/impl/fluo_deploy/main.py
index aa13757..0b91ade 100644
--- a/bin/impl/fluo_deploy/main.py
+++ b/bin/impl/fluo_deploy/main.py
@@ -111,8 +111,7 @@
                               subnet_id=config.subnet_id(),
                               min_count=1,
                               max_count=1,
-                              block_device_map=bdm,
-                              monitoring_enabled=config.detailed_monitoring())
+                              block_device_map=bdm)
   
     if len(resv.instances) != 1:
       exit('ERROR - Failed to start {0} node'.format(hostname))
@@ -192,6 +191,9 @@
 def exec_fluo_cluster_command(config, command):
   exec_on_proxy_verified(config, "bash {base}/install/fluo-cluster/bin/fluo-cluster {command}".format(base=config.cluster_base_dir(), command=command))
 
+def exec_fluo_cluster_command_unverified(config, command):
+  exec_on_proxy(config, "bash {base}/install/fluo-cluster/bin/fluo-cluster {command}".format(base=config.cluster_base_dir(), command=command))
+
 def send_to_proxy(config, path, target, skipIfExists=True): 
   print "Copying to proxy: ",path
   cmd = "scp -o 'StrictHostKeyChecking no'"
@@ -364,11 +366,11 @@
 
   exec_on_proxy_verified(config, "rm -rf {base}/install; tar -C {base} -xzf {base}/tarballs/install.tar.gz".format(base=config.cluster_base_dir()))
 
-  exec_fluo_cluster_command(config, "setup")
+  exec_fluo_cluster_command_unverified(config, "setup-os")
 
   wait_until_cluster_ready(config)
  
-  exec_fluo_cluster_command(config, "init")
+  exec_fluo_cluster_command(config, "setup-sw")
       
 def main():
 
diff --git a/bin/impl/fluo_deploy/util.py b/bin/impl/fluo_deploy/util.py
index 59e566a..55441b3 100644
--- a/bin/impl/fluo_deploy/util.py
+++ b/bin/impl/fluo_deploy/util.py
@@ -65,12 +65,37 @@
   "r3.xlarge": EC2Type("hvm", 1),
 }
 
+# AMI given arch & region.  PVM arch currently not supported
+ami_lookup = {
+  "hvm": { "us-east-1": "ami-61bbf104",
+           "us-west-1": "ami-f77fbeb3",
+           "us-west-2": "ami-d440a6e7",
+           "eu-west-1": "ami-33734044",
+           "eu-central-1": "ami-e68f82fb",
+           "ap-southeast-1": "ami-2a7b6b78",
+           "ap-southeast-2": "ami-d38dc6e9",
+           "ap-northeast-1": " ami-b80b6db8",
+           "sa-east-1": "ami-fd0197e0"},
+  "pvm": { "us-east-1": None,
+           "us-west-1": None,
+           "us-west-2": None,
+           "eu-west-1": None,
+           "eu-central-1": None,
+           "ap-southeast-1": None,
+           "ap-southeast-2": None,
+           "ap-northeast-1": None,
+           "sa-east-1": None},
+}
+
 def get_arch(instance_type):
   return instance_types.get(instance_type).arch
 
 def get_num_ephemeral(instance_type):
   return instance_types.get(instance_type).ephemeral
 
+def get_ami(instance_type, region):
+  return ami_lookup.get(get_arch(instance_type)).get(region)
+
 def exit(msg):
   print msg
   sys.exit(1)
diff --git a/bin/impl/tests/test_config.py b/bin/impl/tests/test_config.py
index d3a9812..7b7a246 100644
--- a/bin/impl/tests/test_config.py
+++ b/bin/impl/tests/test_config.py
@@ -43,9 +43,9 @@
   assert c.version("fluo") == '1.0.0-beta-2-SNAPSHOT'
   assert c.version("hadoop") == '2.7.0'
   assert c.version("zookeeper") == '3.4.7'
-  assert c.hadoop_prefix() == "/home/ec2-user/install/hadoop-2.7.0"
+  assert c.hadoop_prefix() == "/home/centos/install/hadoop-2.7.0"
   assert c.data_dir() == "/media/ephemeral0"
-  assert c.cluster_tarballs_dir() == "/home/ec2-user/tarballs"
+  assert c.cluster_tarballs_dir() == "/home/centos/tarballs"
   assert c.accumulo_tarball() == "accumulo-1.6.4-bin.tar.gz"
   assert c.accumulo_path() == "fluo-deploy/cluster/tarballs/accumulo-1.6.4-bin.tar.gz"
   assert c.accumulo_url() == "http://www.gtlib.gatech.edu/pub/apache/accumulo/1.6.4/accumulo-1.6.4-bin.tar.gz"
@@ -54,15 +54,13 @@
   assert c.proxy_hostname() == "leader1"
   assert c.proxy_public_ip() == "23.0.0.0"
   assert c.proxy_private_ip() == "10.0.0.0"
-  assert c.cluster_base_dir() == "/home/ec2-user"
-  assert c.cluster_username() == "ec2-user"
+  assert c.cluster_base_dir() == "/home/centos"
+  assert c.cluster_username() == "centos"
   assert c.configure_cluster() == "true"
   assert c.get_non_proxy() == [('10.0.0.1', 'leader2'), ('10.0.0.2', 'leader3'), ('10.0.0.3', 'worker1'), ('10.0.0.4', 'worker2'), ('10.0.0.5', 'worker3')]
   assert c.get_host_services() == [('leader1', 'namenode zookeeper fluo dev'), ('leader2', 'resourcemanager zookeeper'), ('leader3', 'accumulomaster zookeeper'),
                                    ('metrics', 'metrics'), ('worker1', 'worker'), ('worker2', 'worker'), ('worker3', 'worker')]
   assert c.zookeeper_server_config() == "server.1=leader1:2888:3888\nserver.2=leader2:2888:3888\nserver.3=leader3:2888:3888"
-  assert c.get_image_id('m1.large') == 'ami-cf1066aa'
-  assert c.get_image_id('m3.large') == 'ami-e3106686'
+  assert c.get_image_id('m3.large') == 'ami-61bbf104'
   assert c.aws_access_key() == 'access_key'
   assert c.aws_secret_key() == 'secret_key'
-  assert c.detailed_monitoring() == False
diff --git a/bin/impl/tests/test_util.py b/bin/impl/tests/test_util.py
index e484a5b..0d46e46 100644
--- a/bin/impl/tests/test_util.py
+++ b/bin/impl/tests/test_util.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from fluo_deploy.util import get_arch, parse_args
+from fluo_deploy.util import get_arch, parse_args, get_ami
 
 def test_util():
   assert get_arch('m1.large') == 'pvm'
   assert get_arch('m3.large') == 'hvm'
+  
+  assert get_ami('m3.large', 'us-east-1') == 'ami-61bbf104'
+  assert get_ami('m1.large', 'us-east-1') == None
 
   hosts_dir = '../../conf/hosts'
   assert parse_args(hosts_dir, ['launch']) == None
diff --git a/cluster/install/fluo-cluster/bin/fluo-cluster b/cluster/install/fluo-cluster/bin/fluo-cluster
index ad3b8af..b1000d4 100755
--- a/cluster/install/fluo-cluster/bin/fluo-cluster
+++ b/cluster/install/fluo-cluster/bin/fluo-cluster
@@ -27,8 +27,8 @@
 set -e
 
 case "$1" in 
-setup)
-  "$bin"/impl/setup.sh "${@:2}"
+setup-os)
+  "$bin"/impl/setup-os.sh "${@:2}"
   ;;
 configure)
   "$bin"/impl/configure.sh "${@:2}"
@@ -36,8 +36,8 @@
 install)
   "$bin"/impl/install.sh "${@:2}"
   ;;
-init)
-  "$bin"/impl/init.sh "${@:2}"
+setup-sw)
+  "$bin"/impl/setup-sw.sh "${@:2}"
   ;;
 start)
   "$bin"/impl/start.sh "${@:2}"
@@ -57,8 +57,8 @@
 *)
   echo -e "Usage: fluo-cluster command (<argument>)\n"
   echo -e  "Commands that run on entire cluster:\n"
-  echo "  setup        Setup, initialize, and start cluster"
-  echo "  init         Initialize and start cluster"
+  echo "  setup-os     Sets up OS of each machine in cluster"
+  echo "  setup-sw     Sets up cluster software"
   echo "  start        Start all services on cluster"
   echo "  kill         Kill cluster processes"
   echo "  ready        Checks if cluster is running and ready"
diff --git a/cluster/install/fluo-cluster/bin/impl/configure.sh b/cluster/install/fluo-cluster/bin/impl/configure.sh
index 20251de..902e717 100755
--- a/cluster/install/fluo-cluster/bin/impl/configure.sh
+++ b/cluster/install/fluo-cluster/bin/impl/configure.sh
@@ -19,7 +19,7 @@
 if [ ! -f /home/$CLUSTER_USERNAME/.fluo-cluster/configured ]; then
 
   if [ "$1" == "--use-config" ]; then
-    IP_ADDR=`/sbin/ifconfig eth0 | grep "inet addr" | cut -d: -f 2 | cut -d' ' -f 1`
+    IP_ADDR=`hostname -I`
     CONF_HOSTS=$CONF_DIR/hosts/configure
     HOST=`grep -w $IP_ADDR $CONF_HOSTS | cut -d ' ' -f 2`
     NUM_EPHEMERAL=`grep -w $IP_ADDR $CONF_HOSTS | cut -d ' ' -f 3`
@@ -32,6 +32,13 @@
     NUM_EPHEMERAL=$2
   fi
 
+  : ${HOST?"HOST must be set"}
+  : ${NUM_EPHEMERAL?"NUM_EPHEMERAL must be set"}
+  : ${CONF_DIR?"CONF_DIR must be set"}
+  : ${SSH_DIR?"SSH_DIR must be set"}
+  : ${CLUSTER_USERNAME?"CLUSTER_USERNAME must be set"}
+  : ${HOME_DIR?"HOME_DIR must be set"}
+
   echo "`hostname`: Configuring $HOST with $NUM_EPHEMERAL drives"
   sudo bash -c "cat $CONF_DIR/hosts/append_to_hosts >> /etc/hosts"
   cat $CONF_DIR/ssh_config >> $SSH_DIR/config
@@ -43,28 +50,21 @@
   cat $CONF_DIR/bashrc >> /home/$CLUSTER_USERNAME/.bashrc
   sudo bash -c "echo 'vm.swappiness = 0' >> /etc/sysctl.conf"
   sudo bash -c "cat $CONF_DIR/limits.conf >> /etc/security/limits.conf"
-  sudo sed -i "s/localhost.localdomain/$HOST/g" /etc/sysconfig/network
+  sudo rm /etc/security/limits.d/20-nproc.conf
+  sudo hostnamectl set-hostname $HOST
+  sudo bash -c "echo 'preserve_hostname: true' >> /etc/cloud/cloud.cfg"
 
-  #need g++ to build accumulo native libs
+  # Need g++ to build accumulo native libs
   sudo yum install -q -y gcc-c++
 
-  # set up cloudwatch memory and disk metrics
-  sudo yum install -q -y perl-DateTime perl-Sys-Syslog perl-LWP-Protocol-https
-  wget -nc -nv -P $HOME_DIR http://aws-cloudwatch.s3.amazonaws.com/downloads/CloudWatchMonitoringScripts-1.2.1.zip
-  unzip -q $HOME_DIR/CloudWatchMonitoringScripts-1.2.1.zip
-  rm $HOME_DIR/CloudWatchMonitoringScripts-1.2.1.zip
-  CRON_SCHED="*/5 * * * *"
-  if [ $DETAILED_MONITORING == "true" ]; then
-    CRON_SCHED="* * * * *"
-  fi
-  CRON_COMMAND="$CRON_SCHED ~/aws-scripts-mon/mon-put-instance-data.pl --mem-util --disk-space-util --disk-path=/ --from-cron --aws-credential-file=$CONF_DIR/awscreds.conf"
-  bash -c "(crontab -l 2>/dev/null; echo \"$CRON_COMMAND\")| crontab -"
-
-  # settings to resolve network issues on AWS while running Spark (see FLUO-DEPLOY-83)
+  # Settings to resolve network issues on AWS while running Spark (see FLUO-DEPLOY-83)
   sudo cp $CONF_DIR/ifup-local /sbin/ifup-local
 
-  #mount ephermal devices... 
-  sudo sed -i 's/defaults,nofail,comment=cloudconfig/defaults,nofail,noatime,nodiratime,comment=cloudconfig/g' /etc/fstab
+  # Mount ephermal devices...
+  sudo umount /mnt
+  sudo mkdir /media/ephemeral0
+  sudo sed -i 's#/mnt\tauto\tdefaults,nofail,comment=cloudconfig#/media/ephemeral0\tauto\tdefaults,nofail,noatime,nodiratime,comment=cloudconfig#g' /etc/fstab 
+  sudo mount /media/ephemeral0
   c="c"
   for i in $(seq 1 $((NUM_EPHEMERAL-1)))
   do
@@ -74,7 +74,7 @@
     sudo mount /media/ephemeral$i
   done
 
-  #make ephemeral drives writable
+  # Make ephemeral drives writable
   for i in $(seq 0 $((NUM_EPHEMERAL-1)))
   do
     sudo chown $CLUSTER_USERNAME /media/ephemeral$i
diff --git a/cluster/install/fluo-cluster/bin/impl/init.sh b/cluster/install/fluo-cluster/bin/impl/init.sh
deleted file mode 100755
index efc636e..0000000
--- a/cluster/install/fluo-cluster/bin/impl/init.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2014 Fluo authors (see AUTHORS)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific 
-
-SSH_OPTS=(-tt -o 'StrictHostKeyChecking no' -A)
-
-set -e
-
-echo "Initializing cluster"
-
-echo "Confirming that nothing running on cluster"
-$BIN_DIR/fluo-cluster kill &> /dev/null
-
-echo "Removing any previous data"
-pssh -i -h $CONF_DIR/hosts/all_hosts "rm -rf /media/ephemeral*/zoo*  /media/ephemeral*/hadoop* /media/ephemeral*/yarn* /media/ephemeral*/influxdb /media/ephemeral*/grafana"
-
-echo "Installing all services on cluster"
-pssh -p 10 -x "-tt -o 'StrictHostKeyChecking no'" -t 300 -i -h $CONF_DIR/hosts/all_hosts "$BIN_DIR/fluo-cluster install --use-config"
-echo "Finished installing all services on cluster"
-
-echo "Setting up myid file on each zookeeper server"
-while read line; do
-  IFS=' ' read -ra ARR <<< "$line"
-  HOST=${ARR[0]}
-  ID=${ARR[1]}
-  echo "`hostname`: Setting zookeeper myid to $ID on $HOST"
-  ssh "${SSH_OPTS[@]}" $CLUSTER_USERNAME@$HOST "mkdir -p $DATA_DIR/zookeeper; echo $ID > $DATA_DIR/zookeeper/myid" < /dev/null
-done < $CONF_DIR/hosts/zookeeper_ids
-
-echo "Starting hadoop"
-ssh "${SSH_OPTS[@]}" $CLUSTER_USERNAME@$NAMENODE_HOST $HADOOP_PREFIX/bin/hdfs namenode -format
-$BIN_DIR/fluo-cluster start hadoop
-
-echo "Starting zookeeper"
-$BIN_DIR/fluo-cluster start zookeeper
-
-echo "Starting accumulo"
-ssh "${SSH_OPTS[@]}" $CLUSTER_USERNAME@$ACCUMULOMASTER_HOST "source $CONF_DIR/env.sh; $ACCUMULO_HOME/bin/accumulo init --clear-instance-name --instance-name $ACCUMULO_INSTANCE --password $ACCUMULO_PASSWORD"
-$BIN_DIR/fluo-cluster start accumulo
-
-echo "Starting spark history server" 
-$HADOOP_PREFIX/bin/hdfs dfs -mkdir -p /spark/history
-$BIN_DIR/fluo-cluster start spark
-
-if [[ "$SETUP_METRICS" = "true" ]]; then
-  echo "Starting metrics (InfluxDB+Grafana)"
-  $BIN_DIR/fluo-cluster start metrics
-fi
-
-echo "Cluster initialization is finished"
diff --git a/cluster/install/fluo-cluster/bin/impl/install.sh b/cluster/install/fluo-cluster/bin/impl/install.sh
index 7ad8d3b..e8bdf2b 100755
--- a/cluster/install/fluo-cluster/bin/impl/install.sh
+++ b/cluster/install/fluo-cluster/bin/impl/install.sh
@@ -11,7 +11,9 @@
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific 
+# See the License for the specific
+
+set -e
 
 RSYNC_OPTS=(-e "ssh -o 'StrictHostKeyChecking no'" --ignore-existing)
 
@@ -53,8 +55,8 @@
     sudo mkdir -p $HCE_DIR/bin
     sudo mkdir -p $HCE_DIR/etc/hadoop
     sudo cp $HADOOP_PREFIX/bin/container-executor $HCE_DIR/bin
-    sudo cp $CONF_DIR/hadoop/container-executor.cfg $HCE_DIR/etc/hadoop/
-    sudo chown -R root:ec2-user $HCE_DIR/
+    sudo cp $CONF_DIR/container-executor.cfg $HCE_DIR/etc/hadoop/
+    sudo chown -R root:$CLUSTER_USERNAME $HCE_DIR/
     sudo chmod -R 6050 $HCE_DIR/
 
     install_spark
@@ -112,6 +114,19 @@
   fi
 }
 
+function install_collectd() {
+  if ! rpm -q --quiet collectd ; then
+    sudo yum install -q -y collectd
+    sudo cp $CONF_DIR/collectd.conf /etc/collectd.conf
+    sudo service collectd start
+    echo "`hostname`: Collectd installed"
+  else 
+    sudo service collectd stop
+    sudo cp $CONF_DIR/collectd.conf /etc/collectd.conf
+    sudo service collectd start
+  fi
+}
+
 function install_metrics(){
   if [ ! -d "$INFLUXDB_INSTALL" ]; then
     get_install $INFLUXDB_TARBALL
@@ -125,6 +140,7 @@
     get_install $GRAFANA_TARBALL
     cp $CONF_DIR/grafana.ini $GRAFANA_INSTALL/conf/custom.ini
     mkdir $GRAFANA_INSTALL/dashboards
+    cp $CONF_DIR/grafana/cluster-dashboard.json $GRAFANA_INSTALL/dashboards/
     cp $FLUO_HOME/contrib/grafana/* $GRAFANA_INSTALL/dashboards/
     echo "`hostname`: Grafana installed"
   fi
@@ -133,6 +149,14 @@
 # Exit if any command fails
 set -e
 
+rpm -q --quiet epel-release || sudo yum install -q -y epel-release
+rpm -q --quiet wget || sudo yum install -q -y wget
+
+# install collectd everywhere if metrics is configured
+if [[ "$SETUP_METRICS" = "true" ]]; then
+  install_collectd
+fi
+
 SERVICES=$@
 if [ "$SERVICES" == "--use-config" ]; then
   HOST=`hostname`
diff --git a/cluster/install/fluo-cluster/bin/impl/setup-os.sh b/cluster/install/fluo-cluster/bin/impl/setup-os.sh
new file mode 100755
index 0000000..cc23b08
--- /dev/null
+++ b/cluster/install/fluo-cluster/bin/impl/setup-os.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Fluo authors (see AUTHORS)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific 
+
+# Exit if any command fails
+set -e
+
+echo "Cluster setup started"
+
+rpm -q --quiet epel-release || sudo yum install -q -y epel-release
+rpm -q --quiet wget || sudo yum install -q -y wget
+rpm -q --quiet pssh || sudo yum install -q -y pssh
+
+echo "Creating tarballs directory on all nodes"
+pssh -x "-o 'StrictHostKeyChecking no'" -i -h $CONF_DIR/hosts/all_except_proxy "mkdir -p $TARBALLS_DIR"
+
+echo "Copying scripts to all nodes"
+pscp.pssh -h $CONF_DIR/hosts/all_except_proxy $TARBALLS_DIR/install.tar.gz $TARBALLS_DIR/install.tar.gz
+
+echo "Installing scripts on all nodes"
+pssh -i -h $CONF_DIR/hosts/all_except_proxy "rm -rf $INSTALL_DIR; tar -C $BASE_DIR -xzf $TARBALLS_DIR/install.tar.gz"
+
+echo "Confirming that nothing is running on cluster"
+$BIN_DIR/fluo-cluster kill &> /dev/null
+
+if [ "$CONFIGURE_CLUSTER" == "true" ]; then
+  echo "Configuring machines on cluster"
+  if [ ! -f /home/$CLUSTER_USERNAME/.ssh/id_rsa ]; then
+   ssh-keygen  -q -t rsa -N ''  -f /home/$CLUSTER_USERNAME/.ssh/id_rsa
+  fi
+
+  echo "Copying private key to all nodes"
+  pscp.pssh -h $CONF_DIR/hosts/all_except_proxy $SSH_DIR/id_rsa $SSH_DIR/id_rsa
+  echo "Copying public key to all nodes"
+  pscp.pssh -h $CONF_DIR/hosts/all_except_proxy $SSH_DIR/id_rsa.pub $SSH_DIR/id_rsa.pub
+
+  echo "Configuring non-proxy nodes"
+  pssh -x "-tt -o 'StrictHostKeyChecking no'" -i -h $CONF_DIR/hosts/all_except_proxy "$BIN_DIR/fluo-cluster configure --use-config" || true
+
+  echo "Configuring proxy"
+  $BIN_DIR/fluo-cluster configure --use-config
+else
+  echo "User chose not to configure ~/.ssh/config, /etc/hosts, & ~/.bashrc on cluster"
+fi
diff --git a/cluster/install/fluo-cluster/bin/impl/setup.sh b/cluster/install/fluo-cluster/bin/impl/setup-sw.sh
similarity index 66%
rename from cluster/install/fluo-cluster/bin/impl/setup.sh
rename to cluster/install/fluo-cluster/bin/impl/setup-sw.sh
index 2fab7e4..63375ac 100755
--- a/cluster/install/fluo-cluster/bin/impl/setup.sh
+++ b/cluster/install/fluo-cluster/bin/impl/setup-sw.sh
@@ -13,6 +13,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific 
 
+SSH_OPTS=(-tt -o 'StrictHostKeyChecking no' -A)
+
 function verify_checksum() {
   tarball=$1
   expected_md5=$2
@@ -24,11 +26,9 @@
   fi
 }
 
-# Exit if any command fails
 set -e
 
-echo "Cluster setup started"
-
+echo "Initializing cluster"
 echo "Downloading required software"
 wget -nc -nv -P $TARBALLS_DIR $APACHE_MIRROR/zookeeper/zookeeper-$ZOOKEEPER_VERSION/$ZOOKEEPER_TARBALL &
 wget -nc -nv -P $TARBALLS_DIR $APACHE_MIRROR/hadoop/common/hadoop-$HADOOP_VERSION/$HADOOP_TARBALL &
@@ -78,39 +78,44 @@
 fi
 echo "Checksums are valid"
 
-echo "Installing pssh"
-sudo yum install -q -y pssh
 
-echo "Creating tarballs directory on all nodes"
-pssh -x "-o 'StrictHostKeyChecking no'" -i -h $CONF_DIR/hosts/all_except_proxy "mkdir -p $TARBALLS_DIR"
-
-echo "Copying scripts to all nodes"
-pscp.pssh -h $CONF_DIR/hosts/all_except_proxy $TARBALLS_DIR/install.tar.gz $TARBALLS_DIR/install.tar.gz
-
-echo "Installing scripts on all nodes"
-pssh -i -h $CONF_DIR/hosts/all_except_proxy "rm -rf $INSTALL_DIR; tar -C $BASE_DIR -xzf $TARBALLS_DIR/install.tar.gz"
-
-echo "Confirming that nothing is running on cluster"
+echo "Confirming that nothing running on cluster"
 $BIN_DIR/fluo-cluster kill &> /dev/null
 
-if [ "$CONFIGURE_CLUSTER" == "true" ]; then
-  echo "Configuring machines on cluster"
-  if [ ! -f /home/$CLUSTER_USERNAME/.ssh/id_rsa ]; then
-   ssh-keygen  -q -t rsa -N ''  -f /home/$CLUSTER_USERNAME/.ssh/id_rsa
-  fi
+echo "Removing any previous data"
+pssh -i -h $CONF_DIR/hosts/all_hosts "rm -rf /media/ephemeral*/zoo*  /media/ephemeral*/hadoop* /media/ephemeral*/yarn* /media/ephemeral*/influxdb /media/ephemeral*/grafana"
 
-  echo "Copying private key to all nodes"
-  pscp.pssh -h $CONF_DIR/hosts/all_except_proxy $SSH_DIR/id_rsa $SSH_DIR/id_rsa
-  echo "Copying public key to all nodes"
-  pscp.pssh -h $CONF_DIR/hosts/all_except_proxy $SSH_DIR/id_rsa $SSH_DIR/id_rsa.pub
+echo "Installing all services on cluster"
+pssh -p 10 -x "-tt -o 'StrictHostKeyChecking no'" -t 300 -i -h $CONF_DIR/hosts/all_hosts "$BIN_DIR/fluo-cluster install --use-config"
+echo "Finished installing all services on cluster"
 
-  echo "Configuring non-proxy nodes"
-  pssh -x "-tt -o 'StrictHostKeyChecking no'" -i -h $CONF_DIR/hosts/all_except_proxy "$BIN_DIR/fluo-cluster configure --use-config"
+echo "Setting up myid file on each zookeeper server"
+while read line; do
+  IFS=' ' read -ra ARR <<< "$line"
+  HOST=${ARR[0]}
+  ID=${ARR[1]}
+  echo "`hostname`: Setting zookeeper myid to $ID on $HOST"
+  ssh "${SSH_OPTS[@]}" $CLUSTER_USERNAME@$HOST "mkdir -p $DATA_DIR/zookeeper; echo $ID > $DATA_DIR/zookeeper/myid" < /dev/null
+done < $CONF_DIR/hosts/zookeeper_ids
 
-  echo "Configuring proxy"
-  $BIN_DIR/fluo-cluster configure --use-config
-else
-  echo "User chose not to configure ~/.ssh/config, /etc/hosts, & ~/.bashrc on cluster"
+echo "Starting hadoop"
+ssh "${SSH_OPTS[@]}" $CLUSTER_USERNAME@$NAMENODE_HOST $HADOOP_PREFIX/bin/hdfs namenode -format
+$BIN_DIR/fluo-cluster start hadoop
+
+echo "Starting zookeeper"
+$BIN_DIR/fluo-cluster start zookeeper
+
+echo "Starting accumulo"
+ssh "${SSH_OPTS[@]}" $CLUSTER_USERNAME@$ACCUMULOMASTER_HOST "source $CONF_DIR/env.sh; $ACCUMULO_HOME/bin/accumulo init --clear-instance-name --instance-name $ACCUMULO_INSTANCE --password $ACCUMULO_PASSWORD"
+$BIN_DIR/fluo-cluster start accumulo
+
+echo "Starting spark history server" 
+$HADOOP_PREFIX/bin/hdfs dfs -mkdir -p /spark/history
+$BIN_DIR/fluo-cluster start spark
+
+if [[ "$SETUP_METRICS" = "true" ]]; then
+  echo "Starting metrics (InfluxDB+Grafana)"
+  $BIN_DIR/fluo-cluster start metrics
 fi
 
-echo "Cluster setup finished"
+echo "Cluster initialization is finished"
diff --git a/cluster/install/fluo-cluster/bin/impl/start.sh b/cluster/install/fluo-cluster/bin/impl/start.sh
index 8c5a4ae..4a58d0e 100755
--- a/cluster/install/fluo-cluster/bin/impl/start.sh
+++ b/cluster/install/fluo-cluster/bin/impl/start.sh
@@ -45,7 +45,17 @@
   retcode=1
   while [ $retcode != 0 ];  do
     URL=http://admin:admin@"$METRICS_SERVER":3000/api/datasources 
-    curl $URL -X POST -H 'Content-Type: application/json;charset=UTF-8' --data-binary `cat $CONF_DIR/grafana-datasource.json`
+    curl $URL -X POST -H 'Content-Type: application/json;charset=UTF-8' --data-binary `cat $CONF_DIR/grafana-fluo-datasource.json`
+    retcode=$?
+    if [ $retcode != 0 ]; then
+      echo "Failed to add Grafana data source.  Retrying in 5 sec.."
+      sleep 5
+    fi
+  done
+  retcode=1
+  while [ $retcode != 0 ];  do
+    URL=http://admin:admin@"$METRICS_SERVER":3000/api/datasources 
+    curl $URL -X POST -H 'Content-Type: application/json;charset=UTF-8' --data-binary `cat $CONF_DIR/grafana-cluster-datasource.json`
     retcode=$?
     if [ $retcode != 0 ]; then
       echo "Failed to add Grafana data source.  Retrying in 5 sec.."
diff --git a/cluster/install/fluo-cluster/conf/.gitignore b/cluster/install/fluo-cluster/conf/.gitignore
index 71acb86..7c392f2 100644
--- a/cluster/install/fluo-cluster/conf/.gitignore
+++ b/cluster/install/fluo-cluster/conf/.gitignore
@@ -11,9 +11,11 @@
 fluo-env.sh
 keys
 apps.properties
-awscreds.conf
 spark-defaults.conf
 spark-env.sh
 influxdb.conf
 grafana.ini
-grafana-datasource.json
+grafana-fluo-datasource.json
+grafana-cluster-datasource.json
+collectd.conf
+container-executor.cfg
diff --git a/cluster/install/fluo-cluster/conf/grafana/cluster-dashboard.json b/cluster/install/fluo-cluster/conf/grafana/cluster-dashboard.json
new file mode 100644
index 0000000..98465c0
--- /dev/null
+++ b/cluster/install/fluo-cluster/conf/grafana/cluster-dashboard.json
@@ -0,0 +1,459 @@
+{
+  "id": null,
+  "title": "Cluster",
+  "originalTitle": "Cluster",
+  "tags": [],
+  "style": "dark",
+  "timezone": "browser",
+  "editable": true,
+  "hideControls": false,
+  "sharedCrosshair": false,
+  "rows": [
+    {
+      "collapse": false,
+      "editable": true,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "datasource": "cluster_metrics",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "leftLogBase": 1,
+            "leftMax": null,
+            "leftMin": null,
+            "rightLogBase": 1,
+            "rightMax": null,
+            "rightMin": null,
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 1,
+          "interval": ">10s",
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "alias": "$tag_host",
+              "fields": [
+                {
+                  "func": "mean",
+                  "name": "value"
+                }
+              ],
+              "groupBy": [
+                {
+                  "interval": "auto",
+                  "type": "time"
+                },
+                {
+                  "key": "host",
+                  "type": "tag"
+                }
+              ],
+              "measurement": "cpu_value",
+              "query": "SELECT mean(\"value\") AS \"value\" FROM \"cpu_value\" WHERE \"type\" = 'percent' AND \"type_instance\" = 'active' AND $timeFilter GROUP BY time($interval), \"host\"",
+              "refId": "A",
+              "tags": [
+                {
+                  "key": "type",
+                  "operator": "=",
+                  "value": "percent"
+                },
+                {
+                  "condition": "AND",
+                  "key": "type_instance",
+                  "operator": "=",
+                  "value": "active"
+                }
+              ]
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "CPU usage (as a percentage) by host",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "x-axis": true,
+          "y-axis": true,
+          "y_formats": [
+            "short",
+            "short"
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "datasource": "cluster_metrics",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "leftLogBase": 1,
+            "leftMax": null,
+            "leftMin": null,
+            "rightLogBase": 1,
+            "rightMax": null,
+            "rightMin": null,
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 2,
+          "interval": "10s",
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "alias": "$tag_host",
+              "fields": [
+                {
+                  "func": "sum",
+                  "name": "value"
+                }
+              ],
+              "groupBy": [
+                {
+                  "interval": "auto",
+                  "type": "time"
+                },
+                {
+                  "key": "host",
+                  "type": "tag"
+                }
+              ],
+              "measurement": "memory_value",
+              "query": "SELECT sum(\"value\") AS \"value\" FROM \"memory_value\" WHERE \"type\" = 'memory' AND \"type_instance\" = 'free' OR \"type_instance\" = 'cached' AND $timeFilter GROUP BY time($interval), \"host\"",
+              "refId": "A",
+              "tags": [
+                {
+                  "key": "type",
+                  "operator": "=",
+                  "value": "memory"
+                },
+                {
+                  "condition": "AND",
+                  "key": "type_instance",
+                  "operator": "=",
+                  "value": "free"
+                },
+                {
+                  "condition": "OR",
+                  "key": "type_instance",
+                  "operator": "=",
+                  "value": "cached"
+                }
+              ]
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Available Memory (free+cached) by host",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "x-axis": true,
+          "y-axis": true,
+          "y_formats": [
+            "bytes",
+            "short"
+          ]
+        }
+      ],
+      "title": "Row"
+    },
+    {
+      "collapse": false,
+      "editable": true,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "datasource": null,
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {
+            "leftLogBase": 1,
+            "leftMax": null,
+            "leftMin": null,
+            "rightLogBase": 1,
+            "rightMax": null,
+            "rightMin": null,
+            "threshold1": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2": null,
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "id": 3,
+          "interval": ">10s",
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "alias": "$tag_host",
+              "fields": [
+                {
+                  "func": "mean",
+                  "name": "value"
+                }
+              ],
+              "groupBy": [
+                {
+                  "interval": "auto",
+                  "type": "time"
+                },
+                {
+                  "key": "host",
+                  "type": "tag"
+                }
+              ],
+              "measurement": "load_shortterm",
+              "query": "SELECT mean(\"value\") AS \"value\" FROM \"load_shortterm\" WHERE \"type\" = 'load' AND $timeFilter GROUP BY time($interval), \"host\"",
+              "refId": "A",
+              "tags": [
+                {
+                  "key": "type",
+                  "operator": "=",
+                  "value": "load"
+                }
+              ]
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Load (short-term)  by host",
+          "tooltip": {
+            "shared": true,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "x-axis": true,
+          "y-axis": true,
+          "y_formats": [
+            "short",
+            "short"
+          ]
+        },
+        {
+          "title": "Available disk (in GB) by host",
+          "error": false,
+          "span": 6,
+          "editable": true,
+          "type": "graph",
+          "id": 4,
+          "datasource": null,
+          "renderer": "flot",
+          "x-axis": true,
+          "y-axis": true,
+          "y_formats": [
+            "bytes",
+            "short"
+          ],
+          "grid": {
+            "leftLogBase": 1,
+            "leftMax": null,
+            "rightMax": null,
+            "leftMin": null,
+            "rightMin": null,
+            "rightLogBase": 1,
+            "threshold1": null,
+            "threshold2": null,
+            "threshold1Color": "rgba(216, 200, 27, 0.27)",
+            "threshold2Color": "rgba(234, 112, 112, 0.22)"
+          },
+          "lines": true,
+          "fill": 1,
+          "linewidth": 2,
+          "points": false,
+          "pointradius": 5,
+          "bars": false,
+          "stack": false,
+          "percentage": false,
+          "legend": {
+            "show": true,
+            "values": false,
+            "min": false,
+            "max": false,
+            "current": false,
+            "total": false,
+            "avg": false
+          },
+          "nullPointMode": "null",
+          "steppedLine": false,
+          "tooltip": {
+            "value_type": "cumulative",
+            "shared": true
+          },
+          "timeFrom": null,
+          "timeShift": null,
+          "targets": [
+            {
+              "refId": "A",
+              "tags": [
+                {
+                  "key": "instance",
+                  "operator": "=",
+                  "value": "media-ephemeral0"
+                },
+                {
+                  "condition": "OR",
+                  "key": "instance",
+                  "operator": "=",
+                  "value": "media-ephemeral0"
+                },
+                {
+                  "condition": "AND",
+                  "key": "type_instance",
+                  "operator": "=",
+                  "value": "free"
+                }
+              ],
+              "groupBy": [
+                {
+                  "type": "time",
+                  "interval": "auto"
+                },
+                {
+                  "type": "tag",
+                  "key": "host"
+                }
+              ],
+              "fields": [
+                {
+                  "name": "value",
+                  "func": "sum"
+                }
+              ],
+              "measurement": "df_value",
+              "query": "SELECT sum(\"value\") AS \"value\" FROM \"df_value\" WHERE \"instance\" = 'media-ephemeral0' OR \"instance\" = 'media-ephemeral0' AND \"type_instance\" = 'free' AND $timeFilter GROUP BY time($interval), \"host\"",
+              "alias": "$tag_host",
+              "rawQuery": false
+            }
+          ],
+          "aliasColors": {},
+          "seriesOverrides": [],
+          "links": [],
+          "interval": "10s"
+        }
+      ],
+      "title": "New row"
+    },
+    {
+      "title": "New row",
+      "height": "250px",
+      "editable": true,
+      "collapse": false,
+      "panels": []
+    }
+  ],
+  "time": {
+    "from": "now-5m",
+    "to": "now"
+  },
+  "timepicker": {
+    "now": true,
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "templating": {
+    "list": []
+  },
+  "annotations": {
+    "list": []
+  },
+  "refresh": "10s",
+  "schemaVersion": 7,
+  "version": 0,
+  "links": []
+}
diff --git a/cluster/install/fluo-cluster/conf/hadoop/container-executor.cfg b/cluster/install/fluo-cluster/conf/hadoop/container-executor.cfg
deleted file mode 100644
index 3277686..0000000
--- a/cluster/install/fluo-cluster/conf/hadoop/container-executor.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-yarn.nodemanager.linux-container-executor.group=ec2-user
-min.user.id=0
-allowed.system.users=ec2-user
diff --git a/cluster/templates/fluo-cluster/conf/awscreds.conf b/cluster/templates/fluo-cluster/conf/awscreds.conf
deleted file mode 100644
index eb51c29..0000000
--- a/cluster/templates/fluo-cluster/conf/awscreds.conf
+++ /dev/null
@@ -1,2 +0,0 @@
-AWSAccessKeyId=$aws.access.key
-AWSSecretKey=$aws.secret.key
diff --git a/cluster/templates/fluo-cluster/conf/collectd.conf b/cluster/templates/fluo-cluster/conf/collectd.conf
new file mode 100644
index 0000000..00f541c
--- /dev/null
+++ b/cluster/templates/fluo-cluster/conf/collectd.conf
@@ -0,0 +1,34 @@
+#Hostname    "localhost"
+#FQDNLookup   true
+#BaseDir     "/var/lib/collectd"
+#PIDFile     "/var/run/collectd.pid"
+#PluginDir   "/usr/lib64/collectd"
+#TypesDB     "/usr/share/collectd/types.db"
+
+Interval     10
+
+#Timeout      2
+#ReadThreads  5
+#WriteThreads 5
+
+LoadPlugin syslog
+LoadPlugin cpu
+LoadPlugin df
+LoadPlugin load
+LoadPlugin memory
+LoadPlugin network
+
+<Plugin cpu>
+  ReportByState false
+  ReportByCpu false
+</Plugin>
+
+<Plugin df>
+  MountPoint "/^/media/ephemeral/"
+</Plugin>
+
+<Plugin network>
+  Server "$METRICS_SERVER" "8096"
+</Plugin>
+
+Include "/etc/collectd.d"
diff --git a/cluster/templates/fluo-cluster/conf/container-executor.cfg b/cluster/templates/fluo-cluster/conf/container-executor.cfg
new file mode 100644
index 0000000..31c3d72
--- /dev/null
+++ b/cluster/templates/fluo-cluster/conf/container-executor.cfg
@@ -0,0 +1,3 @@
+yarn.nodemanager.linux-container-executor.group=$cluster.username
+min.user.id=0
+allowed.system.users=$cluster.username
diff --git a/cluster/templates/fluo-cluster/conf/env.sh b/cluster/templates/fluo-cluster/conf/env.sh
index 5167a30..0bf7d09 100755
--- a/cluster/templates/fluo-cluster/conf/env.sh
+++ b/cluster/templates/fluo-cluster/conf/env.sh
@@ -5,7 +5,6 @@
 export DATA_DIR=$DATA_DIR
 export CLUSTER_USERNAME=$cluster.username
 export CONFIGURE_CLUSTER=$configure.cluster
-export DETAILED_MONITORING=$detailed.monitoring
 export APACHE_MIRROR=$apache.mirror
 export ACCUMULO_VERSION=$accumulo.version
 export ACCUMULO_MD5=$accumulo.md5.hash
diff --git a/cluster/templates/fluo-cluster/conf/grafana-cluster-datasource.json b/cluster/templates/fluo-cluster/conf/grafana-cluster-datasource.json
new file mode 100644
index 0000000..8543049
--- /dev/null
+++ b/cluster/templates/fluo-cluster/conf/grafana-cluster-datasource.json
@@ -0,0 +1 @@
+{"name":"cluster_metrics","type":"influxdb","url":"http://$METRICS_SERVER:8086","access":"direct","isDefault":true,"database":"cluster_metrics","user":"fluo","password":"secret"}
diff --git a/cluster/templates/fluo-cluster/conf/grafana-datasource.json b/cluster/templates/fluo-cluster/conf/grafana-datasource.json
deleted file mode 100644
index d916f87..0000000
--- a/cluster/templates/fluo-cluster/conf/grafana-datasource.json
+++ /dev/null
@@ -1 +0,0 @@
-{"name":"influxdb","type":"influxdb","url":"http://$METRICS_SERVER:8086","access":"direct","isDefault":true,"database":"fluo_metrics","user":"fluo","password":"secret"}
diff --git a/cluster/templates/fluo-cluster/conf/grafana-fluo-datasource.json b/cluster/templates/fluo-cluster/conf/grafana-fluo-datasource.json
new file mode 100644
index 0000000..870d79b
--- /dev/null
+++ b/cluster/templates/fluo-cluster/conf/grafana-fluo-datasource.json
@@ -0,0 +1 @@
+{"name":"fluo_metrics","type":"influxdb","url":"http://$METRICS_SERVER:8086","access":"direct","isDefault":true,"database":"fluo_metrics","user":"fluo","password":"secret"}
diff --git a/cluster/templates/fluo-cluster/conf/influxdb.conf b/cluster/templates/fluo-cluster/conf/influxdb.conf
index 2b23f11..40d7d98 100644
--- a/cluster/templates/fluo-cluster/conf/influxdb.conf
+++ b/cluster/templates/fluo-cluster/conf/influxdb.conf
@@ -14,6 +14,15 @@
 [admin]
   enabled = true
 
+[collectd]
+  enabled = true
+  bind-address = ":8096"
+  database = "cluster_metrics"
+  typesdb = "/usr/share/collectd/types.db"
+  batch-size = 1000   
+  batch-pending = 5
+  batch-timeout = "1s"
+
 [[graphite]]
   bind-address = ":2003"
   enabled = true
diff --git a/cluster/templates/fluo-cluster/conf/yarn-site.xml b/cluster/templates/fluo-cluster/conf/yarn-site.xml
index 3ea5b2f..06a2f9d 100644
--- a/cluster/templates/fluo-cluster/conf/yarn-site.xml
+++ b/cluster/templates/fluo-cluster/conf/yarn-site.xml
@@ -79,10 +79,10 @@
   </property>
   <property>
     <name>yarn.nodemanager.container-executor.group</name>
-    <value>ec2-user</value>
+    <value>$cluster.username</value>
   </property>
   <property>
     <name>yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user</name>
-    <value>ec2-user</value>
+    <value>$cluster.username</value>
   </property>
 </configuration>
diff --git a/conf/fluo-deploy.props.example b/conf/fluo-deploy.props.example
index 13ea32c..31a8681 100644
--- a/conf/fluo-deploy.props.example
+++ b/conf/fluo-deploy.props.example
@@ -15,10 +15,10 @@
 [general]
 # Cluster user name (install command will SSH to cluster using this user)
 # Leave default below if launching cluster in AWS
-cluster.username = ec2-user
+cluster.username = centos
 # Cluster base directory where install/ & tarballs/ directories are created
 # Leave default below if launching cluster in AWS
-cluster.base.dir = /home/ec2-user
+cluster.base.dir = /home/centos
 # Hostname of proxy node that fluo-deploy will use to direct installation of cluster.  Will be given 
 # public IP if launching in EC2.  If not launching in EC2, node must have public IP that can be reached
 # from your machine. Hostname can be chosen from "nodes" section below.
@@ -76,17 +76,6 @@
 #reason setting this to non zero causes some instance types to fail, so use with
 #caution.  See issue #59
 ebs.root.size = 0
-# Enables detailed monitoring on EC2 instance which costs extra in AWS
-detailed.monitoring = false
-# Set AMI image given virtualization type (which is determined by your instance type)
-# Most instance types use HVM rather than PVM virtualization. AMI IDs differ by EC2 region. 
-# Below are settings for Amazon Linux AMI for different regions.  N. Virginia is set by default.
-# N. Virginia
-hvm.ami = ami-e3106686
-pvm.ami = ami-cf1066aa
-# Oregon
-#hvm.ami = ami-9ff7e8af
-#pvm.ami = ami-81f7e8b1
 
 [performance]
 #Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or