Several update to Muchos (#233)
* Using workers file with Hadoop 3
* Updated deprecated Ansible code
* Upgraded software versions
* Removed network settings for Spark
diff --git a/ansible/common.yml b/ansible/common.yml
index b9214f4..8a17ee8 100644
--- a/ansible/common.yml
+++ b/ansible/common.yml
@@ -13,6 +13,3 @@
- import_tasks: roles/common/tasks/ssh.yml
- import_tasks: roles/common/tasks/os.yml
- import_tasks: roles/common/tasks/drives.yml
- handlers:
- - name: "update network settings"
- command: /sbin/ifup-local {{ network_interface }}
diff --git a/ansible/roles/accumulo/templates/accumulo.properties b/ansible/roles/accumulo/templates/accumulo.properties
index 44edeca..2379c18 100644
--- a/ansible/roles/accumulo/templates/accumulo.properties
+++ b/ansible/roles/accumulo/templates/accumulo.properties
@@ -20,7 +20,7 @@
## Time to wait on I/O for simple, short RPC calls
general.rpc.timeout=240s
-## A secret unique to a given instnace that servers must know to communicate
+## A secret unique to a given instance that servers must know to communicate
instance.secret=muchos
## Sets location in HDFS where Accumulo will store data
diff --git a/ansible/roles/common/files/ifup-local b/ansible/roles/common/files/ifup-local
deleted file mode 100755
index 29102ab..0000000
--- a/ansible/roles/common/files/ifup-local
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-/sbin/ethtool -K $1 sg off
-/sbin/ethtool -K $1 tso off
diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml
index 3b2d094..5c5216e 100644
--- a/ansible/roles/common/tasks/main.yml
+++ b/ansible/roles/common/tasks/main.yml
@@ -1,16 +1,17 @@
- name: "enable epel yum repo"
yum: name=epel-release state=present
- name: "install packages"
- yum: name={{ item }} state=present
- with_items:
- - vim
- - git
- - wget
- - gcc-c++
- - collectd
- - screen
- - patch
- - java-1.8.0-openjdk-devel
+ yum:
+ name:
+ - vim
+ - git
+ - wget
+ - gcc-c++
+ - collectd
+ - screen
+ - patch
+ - java-1.8.0-openjdk-devel
+ state: present
- name: "configure node shutdown"
shell: shutdown +{{ shutdown_delay_minutes }} &> {{ cluster_basedir }}/.shutdown creates={{ cluster_basedir}}/.shutdown
when: shutdown_delay_minutes > 0
diff --git a/ansible/roles/common/tasks/os.yml b/ansible/roles/common/tasks/os.yml
index 2d27ba8..ff98ea4 100644
--- a/ansible/roles/common/tasks/os.yml
+++ b/ansible/roles/common/tasks/os.yml
@@ -4,9 +4,6 @@
file: path=/etc/security/limits.d/20-nproc.conf state=absent
- name: "copy new limits.conf"
copy: src=roles/common/files/limits.conf dest=/etc/security/limits.conf
-- name: "configure network settings for spark"
- copy: src=roles/common/files/ifup-local dest=/sbin/ifup-local mode=0755
- notify: "update network settings"
- name: "configure user shell"
template: src=roles/common/templates/{{ item }} dest=/home/{{ cluster_user }}/.{{ item }} owner={{ cluster_user }} group={{ cluster_user }} mode=0644
with_items:
diff --git a/ansible/roles/grafana/tasks/main.yml b/ansible/roles/grafana/tasks/main.yml
index 4af4114..e6da759 100644
--- a/ansible/roles/grafana/tasks/main.yml
+++ b/ansible/roles/grafana/tasks/main.yml
@@ -19,7 +19,7 @@
- fluo-recent-dashboard.json
- fluo-historical-dashboard.json
- name: "configure accumulo dashboards"
- get_url: url=https://raw.githubusercontent.com/apache/fluo-uno/master/conf/grafana/accumulo-dashboard.json dest=/etc/grafana/dashboards/accumulo-dashboard.json
+ get_url: url=https://raw.githubusercontent.com/apache/fluo-uno/master/plugins/influx-metrics/accumulo-dashboard.json dest=/etc/grafana/dashboards/accumulo-dashboard.json
- name: "ensure owner and group are correct for /etc/grafana"
file: path=/etc/grafana/ recurse=yes owner=grafana group=grafana
- name: "ensure grafana is running (and enable it at boot)"
diff --git a/ansible/roles/hadoop/tasks/main.yml b/ansible/roles/hadoop/tasks/main.yml
index fdb63e5..b37dee8 100644
--- a/ansible/roles/hadoop/tasks/main.yml
+++ b/ansible/roles/hadoop/tasks/main.yml
@@ -7,7 +7,16 @@
- hdfs-site.xml
- yarn-site.xml
- mapred-site.xml
+- name: "configure hadoop 2"
+ template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
+ with_items:
- slaves
+ when: hadoop_major_version == '2'
+- name: "configure hadoop 3"
+ template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
+ with_items:
+ - workers
+ when: hadoop_major_version == '3'
- name: "copy spark yarn shuffle jar to hadoop lib"
command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_home }}/share/hadoop/yarn/lib/ creates={{ hadoop_home }}/share/hadoop/yarn/lib/spark-{{ spark_version }}-yarn-shuffle.jar
when: "'spark' in groups"
diff --git a/ansible/roles/hadoop/templates/workers b/ansible/roles/hadoop/templates/workers
new file mode 100644
index 0000000..cd3348c
--- /dev/null
+++ b/ansible/roles/hadoop/templates/workers
@@ -0,0 +1,3 @@
+{% for host in groups['workers'] %}
+{{ host }}
+{% endfor %}
diff --git a/conf/checksums b/conf/checksums
index af0c4aa..cdc605f 100644
--- a/conf/checksums
+++ b/conf/checksums
@@ -1,3 +1,4 @@
+accumulo:2.0.0-alpha-1:baa5e0929248ff0d96355bc7fb42a5b75d183a83364519296e07b0adbb089180
accumulo:1.9.2:c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5
accumulo:1.9.0:f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe
accumulo:1.8.1:eba3bfe823935ca7901ea7c2bd59c84a68b9381361699c7e260bbd9191f237f4
@@ -13,6 +14,7 @@
hadoop:2.7.6:f2327ea93f4bc5a5d7150dee8e0ede196d3a77ff8526a7dd05a48a09aae25669
hadoop:2.7.5:0bfc4d9b04be919be2fdf36f67fa3b4526cdbd406c512a7a1f5f1b715661f831
hadoop:2.6.5:001ad18d4b6d0fe542b15ddadba2d092bc97df1c4d2d797381c8d12887691898
+spark:2.3.2:3387107155d62f04ccf6bcaf2e00a69a0de5ae5df875348d93147743c206f0a8
spark:2.2.2:023b2fea378b3dd0fee2d5d1de6bfaf2d8349aefe7be97a9cbcf03bbacc428d7
zookeeper:3.4.13:7ced798e41d2027784b8fd55c908605ad5bd94a742d5dab2506be8f94770594d
zookeeper:3.4.12:c686f9319050565b58e642149cb9e4c9cc8c7207aacc2cb70c5c0672849594b9
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index a98057b..c797155 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -25,8 +25,6 @@
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
proxy_hostname = leader1
-# Cluster network interface (leave default if launching in AWS)
-network_interface=eth0
# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
#proxy_socks_port = 38585
# Accumulo Instance name
@@ -34,12 +32,12 @@
# Accumluo Password
accumulo_password = secret
# Software versions (set sha-256 in conf/checksums)
-hadoop_version = 2.8.4
-zookeeper_version = 3.4.12
-spark_version = 2.2.2
+hadoop_version = 3.1.1
+zookeeper_version = 3.4.13
+spark_version = 2.3.2
fluo_version = 1.2.0
fluo_yarn_version = 1.0.0
-accumulo_version = 1.9.2
+accumulo_version = 2.0.0-alpha-1
[ec2]
# AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
diff --git a/lib/muchos/config.py b/lib/muchos/config.py
index 2aa8996..5f8a399 100644
--- a/lib/muchos/config.py
+++ b/lib/muchos/config.py
@@ -346,7 +346,6 @@
'metrics_drive_ids': None,
'mount_root': None,
'node_type_map': None,
- 'network_interface': None,
'spark_sha256': None,
'shutdown_delay_minutes': None,
'twill_reserve_mem_mb': None,
diff --git a/lib/muchos/main.py b/lib/muchos/main.py
index 0d8d0f4..a7824f5 100644
--- a/lib/muchos/main.py
+++ b/lib/muchos/main.py
@@ -200,7 +200,7 @@
if name in play_vars:
play_vars[name] = value
- play_vars['accumulos_sha256'] = config.checksum('accumulo')
+ play_vars['accumulo_sha256'] = config.checksum('accumulo')
play_vars['fluo_sha256'] = config.checksum('fluo')
play_vars['fluo_yarn_sha256'] = config.checksum('fluo_yarn')
play_vars['hadoop_sha256'] = config.checksum('hadoop')