Unify Hadoop HA and non-HA into one Ansible role (#311)
Removes code duplication and lends to easier maintenance.
diff --git a/ansible/hadoop-ha.yml b/ansible/hadoop-ha.yml
deleted file mode 100644
index 8fbd8d4..0000000
--- a/ansible/hadoop-ha.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-- hosts: all:!{{ azure_proxy_host }}
- roles:
- - hadoop-ha
-- hosts: journalnode
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/start-journal.yml
-- hosts: namenode[0]
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/format-nn.yml
-- hosts: namenode[0]
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/format-zk.yml
-- hosts: namenode
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/start-zkfc.yml
-- hosts: namenode[0]
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/start-nn1.yml
-- hosts: namenode[1]
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/start-nn2.yml
-- hosts: workers
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/start-dn.yml
-- hosts: resourcemanager
- tasks:
- - import_tasks: roles/hadoop-ha/tasks/start-yarn.yml
diff --git a/ansible/hadoop.yml b/ansible/hadoop.yml
index 30fd7f7..3c843b8 100644
--- a/ansible/hadoop.yml
+++ b/ansible/hadoop.yml
@@ -18,6 +18,28 @@
- hosts: all:!{{ azure_proxy_host }}
roles:
- hadoop
+- hosts: journalnode
+ tasks:
+ - import_tasks: roles/hadoop/tasks/start-journal.yml
+ when: hdfs_ha == True
+- hosts: namenode[0]
+ tasks:
+ - import_tasks: roles/hadoop/tasks/format-nn.yml
+- hosts: namenode[0]
+ tasks:
+ - import_tasks: roles/hadoop/tasks/format-zk.yml
+ when: hdfs_ha == True
+- hosts: namenode
+ tasks:
+ - import_tasks: roles/hadoop/tasks/start-zkfc.yml
+ when: hdfs_ha == True
+- hosts: namenode[0]
+ tasks:
+ - import_tasks: roles/hadoop/tasks/start-nn1.yml
+- hosts: namenode:!namenode[0]
+ tasks:
+ - import_tasks: roles/hadoop/tasks/start-nn2.yml
+ when: hdfs_ha == True
- hosts: namenode
tasks:
- import_tasks: roles/hadoop/tasks/start-hdfs.yml
diff --git a/ansible/roles/azure/tasks/create_vmss.yml b/ansible/roles/azure/tasks/create_vmss.yml
index d7f5d08..c47fcb5 100644
--- a/ansible/roles/azure/tasks/create_vmss.yml
+++ b/ansible/roles/azure/tasks/create_vmss.yml
@@ -216,49 +216,49 @@
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = namenode,resourcemanager,accumulomaster,zookeeper"
with_items: "{{ instances_dict | json_query('[0].value') }}"
- when: not hdfs_ha|bool
+ when: not hdfs_ha
- name: Assign metrics to the second node of the cluster
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = metrics"
with_items: "{{ instances_dict | json_query('[1].value') }}"
- when: not hdfs_ha|bool
+ when: not hdfs_ha
- name: Add worker nodes to muchos.props
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = worker"
with_items: "{{ instances_dict | json_query('[2:].value') }}"
- when: not hdfs_ha|bool
+ when: not hdfs_ha
- name: Assign Accumulo master, HDFS HA components cluster roles to the first node of the cluster
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = namenode,resourcemanager,accumulomaster,zookeeper,journalnode,zkfc"
with_items: "{{ instances_dict | json_query('[0].value') }}"
- when: hdfs_ha|bool
+ when: hdfs_ha
- name: Assign Accumulo master, HDFS HA components cluster roles to the second node of the cluster
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = zookeeper,metrics,journalnode,namenode,zkfc,accumulomaster"
with_items: "{{ instances_dict | json_query('[1].value') }}"
- when: hdfs_ha|bool
+ when: hdfs_ha
- name: Assign HDFS HA components cluster roles to the third node of the cluster
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = journalnode,zookeeper"
with_items: "{{ instances_dict | json_query('[2].value') }}"
- when: hdfs_ha|bool
+ when: hdfs_ha
- name: Add worker nodes to muchos.props
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
line: "{{ item }} = worker"
with_items: "{{ instances_dict | json_query('[3:].value') }}"
- when: hdfs_ha|bool
+ when: hdfs_ha
- name: Change proxy hostname to azure proxy host in muchos.props
lineinfile:
diff --git a/ansible/roles/hadoop-ha/tasks/main.yml b/ansible/roles/hadoop-ha/tasks/main.yml
deleted file mode 100644
index dd92ae1..0000000
--- a/ansible/roles/hadoop-ha/tasks/main.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-- name: "install hadoop tarball"
- unarchive: src={{ tarballs_dir }}/{{ hadoop_tarball }} dest={{ install_dir }} creates={{ hadoop_home }} copy=yes
-- name: "configure hadoop with templates"
- template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
- with_items:
- - core-site.xml
- - hdfs-site.xml
- - yarn-site.xml
- - mapred-site.xml
- - hadoop-metrics2.properties
-- name: "configure hadoop 2"
- template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
- with_items:
- - slaves
- when: hadoop_major_version == '2'
-- name: "configure hadoop 3"
- template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
- with_items:
- - workers
- when: hadoop_major_version == '3'
-
-# This is currently needed to run hadoop with Java 11 (see https://github.com/apache/fluo-muchos/issues/266)
-- name: "Copy javax.activation-api (when Hadoop 3 and Java 11 are used)"
- synchronize: src={{ user_home }}/mvn_dep/ dest={{ hadoop_home }}/share/hadoop/common/lib/
- when: hadoop_major_version == '3' and java_product_version == 11
-
-- name: "copy spark yarn shuffle jar to hadoop lib"
- command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_home }}/share/hadoop/yarn/lib/ creates={{ hadoop_home }}/share/hadoop/yarn/lib/spark-{{ spark_version }}-yarn-shuffle.jar
- when: "'spark' in groups"
-- name: "setup hadoop short circuit socket dir"
- file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_group }} mode=0755
- become: yes
-- name: "Configure hadoop log dir"
- replace:
- path: "{{ hadoop_home }}/etc/hadoop/hadoop-env.sh"
- regexp: '.*export\s+HADOOP_LOG_DIR.*'
- replace: "export HADOOP_LOG_DIR={{ worker_data_dirs[0] }}/logs/hadoop"
-- name: "Create hadoop log dir"
- file: path={{ worker_data_dirs[0] }}/logs/hadoop state=directory
-- name: Insert HADOOP_OPTIONAL_TOOLS & HADOOP_OPTS in hadoop-env.sh
- blockinfile:
- path: "{{ hadoop_home }}/etc/hadoop/hadoop-env.sh"
- insertafter: EOF
- block: |
- export HADOOP_OPTIONAL_TOOLS=hadoop-azure
- export HADOOP_OPTS="-Dorg.wildfly.openssl.path=/usr/lib64 ${HADOOP_OPTS}"
- when: hadoop_major_version == '3' and use_adlsg2 == True
diff --git a/ansible/roles/hadoop-ha/tasks/start-dn.yml b/ansible/roles/hadoop-ha/tasks/start-dn.yml
deleted file mode 100644
index 0898199..0000000
--- a/ansible/roles/hadoop-ha/tasks/start-dn.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-### Licensed to the Apache Software Foundation (ASF) under one or more
-### contributor license agreements. See the NOTICE file distributed with
-### this work for additional information regarding copyright ownership.
-### The ASF licenses this file to You under the Apache License, Version 2.0
-### (the "License"); you may not use this file except in compliance with
-### the License. You may obtain a copy of the License at
-###
-### http://www.apache.org/licenses/LICENSE-2.0
-###
-### Unless required by applicable law or agreed to in writing, software
-### distributed under the License is distributed on an "AS IS" BASIS,
-### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-### See the License for the specific language governing permissions and
-### limitations under the License.
-###
-##
-- name: Check if DataNode is running
- shell: jps | grep " DataNode" | grep -v grep
- ignore_errors: yes
- changed_when: false
- register: datanode_status
-- name: "start datanodes"
- command: "nohup {{ hadoop_home }}/sbin/hadoop-daemon.sh start datanode"
- when: datanode_status.rc == 1
diff --git a/ansible/roles/hadoop-ha/tasks/start-yarn.yml b/ansible/roles/hadoop-ha/tasks/start-yarn.yml
deleted file mode 100644
index 10c0c45..0000000
--- a/ansible/roles/hadoop-ha/tasks/start-yarn.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-- name: "start hadoop yarn 2.x"
- command: "{{ hadoop_home }}/sbin/start-yarn.sh"
- register: start_yarn_result
- changed_when: start_yarn_result.stdout | search("starting (:?resource|node)manager")
- when: hadoop_major_version == '2'
-- name: "start hadoop yarn 3.x"
- command: "nohup {{ hadoop_home }}/sbin/start-yarn.sh"
- register: start_yarn_result
- changed_when: start_yarn_result.rc == 0
- failed_when: start_yarn_result.rc >= 2
- when: hadoop_major_version == '3'
diff --git a/ansible/roles/hadoop-ha/templates/core-site.xml b/ansible/roles/hadoop-ha/templates/core-site.xml
deleted file mode 100644
index d717c5c..0000000
--- a/ansible/roles/hadoop-ha/templates/core-site.xml
+++ /dev/null
@@ -1,71 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
--->
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
- <property>
- <name>fs.defaultFS</name>
- <value>{{ hdfs_root }}</value>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit</name>
- <value>true</value>
- </property>
- <property>
- <name>dfs.domain.socket.path</name>
- <value>/var/lib/hadoop-hdfs/dn_socket</value>
- </property>
- <property>
- <name>ha.zookeeper.quorum</name>
- <value>{{ zookeeper_connect }}</value>
- </property>
-{% if use_adlsg2 == True %}
- <property>
- <name>fs.azure.account.auth.type</name>
- <value>OAuth</value>
- </property>
- <property>
- <name>fs.azure.account.oauth.provider.type</name>
- <value>org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider</value>
- </property>
- <property>
- <name>fs.azure.account.oauth2.msi.tenant</name>
- <value>{{ azure_tenant_id}}</value>
- </property>
- <property>
- <name>fs.azure.account.oauth2.client.id</name>
- <value>{{ azure_client_id }}</value>
- </property>
- <property>
- <name>fs.azure.use.upn</name>
- <value>true</value>
- </property>
- <property>
- <name>fs.azure.identity.transformer.service.principal.substitution.list</name>
- <value>*</value>
- </property>
- <property>
- <name>fs.azure.identity.transformer.service.principal.id</name>
- <value>{{ principal_id }}</value>
- </property>
-{% endif %}
-</configuration>
diff --git a/ansible/roles/hadoop-ha/templates/hadoop-metrics2.properties b/ansible/roles/hadoop-ha/templates/hadoop-metrics2.properties
deleted file mode 100644
index 40b4e66..0000000
--- a/ansible/roles/hadoop-ha/templates/hadoop-metrics2.properties
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-{% if cluster_type == 'azure' %}
-*.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
-*.sink.statsd.period=60
-namenode.sink.statsd.server.host=127.0.0.1
-namenode.sink.statsd.server.port=8125
-namenode.sink.statsd.skip.hostname=true
-namenode.sink.statsd.service.name=NameNode
-datanode.sink.statsd.server.host=127.0.0.1
-datanode.sink.statsd.server.port=8125
-datanode.sink.statsd.skip.hostname=true
-datanode.sink.statsd.service.name=DataNode
-{% endif %}
diff --git a/ansible/roles/hadoop-ha/templates/hdfs-site.xml b/ansible/roles/hadoop-ha/templates/hdfs-site.xml
deleted file mode 100644
index b80b3d1..0000000
--- a/ansible/roles/hadoop-ha/templates/hdfs-site.xml
+++ /dev/null
@@ -1,117 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
--->
-<!--
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. See accompanying LICENSE file.
--->
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
- <property>
- <name>dfs.datanode.synconclose</name>
- <value>true</value>
- </property>
- <property>
- <name>dfs.namenode.name.dir</name>
- <value>{{ worker_data_dirs[0] }}/hadoop/name</value>
- </property>
- <property>
- <name>dfs.datanode.data.dir</name>
- <value>{% for dir in worker_data_dirs -%}
- {{ dir }}/hadoop/data
- {%- if not loop.last -%} , {%- endif -%}
- {%- endfor %}</value>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-directory-items</name>
- <value>6400000</value>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit</name>
- <value>true</value>
- </property>
- <property>
- <name>dfs.domain.socket.path</name>
- <value>/var/lib/hadoop-hdfs/dn_socket</value>
- </property>
- <property>
- <name>dfs.nameservices</name>
- <value>{{ nameservice_id }}</value>
- </property>
- <property>
- <name>dfs.ha.namenodes.{{ nameservice_id }}</name>
- <value>nn1,nn2</value>
- </property>
- <property>
- <name>dfs.namenode.rpc-address.{{ nameservice_id }}.nn1</name>
- <value>{{ groups['namenode'][0] }}:8020</value>
- </property>
- <property>
- <name>dfs.namenode.rpc-address.{{ nameservice_id }}.nn2</name>
- <value>{{ groups['namenode'][1] }}:8020</value>
- </property>
- <property>
- <name>dfs.namenode.http-address.{{ nameservice_id }}.nn1</name>
- <value>{{ groups['namenode'][0] }}:50070</value>
- </property>
- <property>
- <name>dfs.namenode.http-address.{{ nameservice_id }}.nn2</name>
- <value>{{ groups['namenode'][1] }}:50070</value>
- </property>
- <property>
- <name>dfs.namenode.https-address.{{ nameservice_id }}.nn1</name>
- <value>{{ groups['namenode'][0] }}:50071</value>
- </property>
- <property>
- <name>dfs.namenode.https-address.{{ nameservice_id }}.nn2</name>
- <value>{{ groups['namenode'][1] }}:50071</value>
- </property>
- <property>
- <name>dfs.namenode.shared.edits.dir</name>
- <value>qjournal://{{ journal_quorum }}/{{ nameservice_id }}</value>
- </property>
- <property>
- <name>dfs.client.failover.proxy.provider.{{ nameservice_id }}</name>
- <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
- </property>
- <property>
- <name>dfs.ha.fencing.methods</name>
- <value>shell(/usr/bin/true)</value>
- </property>
- <property>
- <name>dfs.journalnode.edits.dir</name>
- <value>{{ worker_data_dirs[0] }}/hadoop/journal</value>
- </property>
- <property>
- <name>dfs.ha.automatic-failover.enabled</name>
- <value>true</value>
- </property>
-</configuration>
diff --git a/ansible/roles/hadoop-ha/templates/mapred-site.xml b/ansible/roles/hadoop-ha/templates/mapred-site.xml
deleted file mode 100644
index c3def16..0000000
--- a/ansible/roles/hadoop-ha/templates/mapred-site.xml
+++ /dev/null
@@ -1,63 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
--->
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
- <property>
- <name>mapreduce.framework.name</name>
- <value>yarn</value>
- </property>
- <property>
- <name>mapreduce.cluster.temp.dir</name>
- <value>{% for dir in worker_data_dirs -%}
- {{ dir }}/hadoop/mapred/temp
- {%- if not loop.last -%} , {%- endif -%}
- {%- endfor %}</value>
- </property>
- <property>
- <name>mapreduce.cluster.local.dir</name>
- <value>{% for dir in worker_data_dirs -%}
- {{ dir }}/hadoop/mapred/local
- {%- if not loop.last -%} , {%- endif -%}
- {%- endfor %}</value>
- </property>
-{% if hadoop_major_version == '3' %}
- <property>
- <name>yarn.app.mapreduce.am.env</name>
- <value>HADOOP_MAPRED_HOME={{ hadoop_home }}</value>
- </property>
- <property>
- <name>mapreduce.map.env</name>
- <value>HADOOP_MAPRED_HOME={{ hadoop_home }}</value>
- </property>
- <property>
- <name>mapreduce.reduce.env</name>
- <value>HADOOP_MAPRED_HOME={{ hadoop_home }}</value>
- </property>
-{% endif %}
-{% if use_adlsg2 == True %}
- <property>
- <name>mapreduce.application.classpath</name>
- <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/common/*,$HADOOP_MAPRED_HOME/share/hadoop/common/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/yarn/*,$HADOOP_MAPRED_HOME/share/hadoop/yarn/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/hdfs/*,$HADOOP_MAPRED_HOME/share/hadoop/hdfs/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/tools/lib/*,${HADOOP_HOME}/share/hadoop/client/*</value>
- </property>
-{% endif %}
-</configuration>
diff --git a/ansible/roles/hadoop-ha/templates/slaves b/ansible/roles/hadoop-ha/templates/slaves
deleted file mode 100644
index cd3348c..0000000
--- a/ansible/roles/hadoop-ha/templates/slaves
+++ /dev/null
@@ -1,3 +0,0 @@
-{% for host in groups['workers'] %}
-{{ host }}
-{% endfor %}
diff --git a/ansible/roles/hadoop-ha/templates/workers b/ansible/roles/hadoop-ha/templates/workers
deleted file mode 100644
index cd3348c..0000000
--- a/ansible/roles/hadoop-ha/templates/workers
+++ /dev/null
@@ -1,3 +0,0 @@
-{% for host in groups['workers'] %}
-{{ host }}
-{% endfor %}
diff --git a/ansible/roles/hadoop-ha/templates/yarn-site.xml b/ansible/roles/hadoop-ha/templates/yarn-site.xml
deleted file mode 100644
index eb45896..0000000
--- a/ansible/roles/hadoop-ha/templates/yarn-site.xml
+++ /dev/null
@@ -1,102 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
--->
-<!--
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. See accompanying LICENSE file.
--->
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
- <property>
- <name>yarn.resourcemanager.hostname</name>
- <value>{{ groups['resourcemanager'][0] }}</value>
- </property>
- <property>
- <name>yarn.nodemanager.local-dirs</name>
- <value>{% for dir in worker_data_dirs -%}
- {{ dir }}/hadoop/yarn/local
- {%- if not loop.last -%} , {%- endif -%}
- {%- endfor %}</value>
- </property>
- <property>
- <name>yarn.nodemanager.log-dirs</name>
- <value>{{ worker_data_dirs[0] }}/hadoop/yarn/logs</value>
- </property>
- {% if 'spark' in groups %}
- <property>
- <name>yarn.nodemanager.aux-services</name>
- <value>mapreduce_shuffle,spark_shuffle</value>
- </property>
- <property>
- <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
- <value>org.apache.spark.network.yarn.YarnShuffleService</value>
- </property>
- {% else %}
- <property>
- <name>yarn.nodemanager.aux-services</name>
- <value>mapreduce_shuffle</value>
- </property>
- {% endif %}
- <property>
- <name>yarn.nodemanager.resource.memory-mb</name>
- <value>{{ yarn_nm_mem_mb }}</value>
- </property>
- <property>
- <name>yarn.scheduler.minimum-allocation-mb</name>
- <value>128</value>
- </property>
- <property>
- <name>yarn.nodemanager.vmem-check-enabled</name>
- <value>false</value>
- </property>
- <property>
- <name>yarn.nodemanager.vmem-pmem-ratio</name>
- <value>3.1</value>
- </property>
- <property>
- <name>yarn.nodemanager.delete.debug-delay-sec</name>
- <value>259200</value>
- </property>
- <property>
- <name>yarn.resourcemanager.scheduler.class</name>
- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
- </property>
- <property>
- <name>twill.java.reserved.memory.mb</name>
- <value>{{ twill_reserve_mem_mb }}</value>
- </property>
- {% if use_adlsg2 == True %}
- <property>
- <name>yarn.application.classpath</name>
- <value>${HADOOP_HOME}/share/hadoop/tools/lib/*,${HADOOP_HOME}/share/hadoop/hdfs/lib/*,${HADOOP_HOME}/share/hadoop/common/lib/*,${HADOOP_HOME}/share/hadoop/yarn/*,${HADOOP_HOME}/share/hadoop/yarn/lib/*,${HADOOP_HOME}/share/hadoop/hdfs/*,${HADOOP_HOME}/share/hadoop/common/*,${HADOOP_HOME}/share/hadoop/mapreduce/*,${HADOOP_HOME}/share/hadoop/mapreduce/lib/*,${HADOOP_HOME}/share/hadoop/client/*</value>
- </property>
- {% endif %}
-</configuration>
diff --git a/ansible/roles/hadoop-ha/tasks/format-nn.yml b/ansible/roles/hadoop/tasks/format-nn.yml
similarity index 100%
rename from ansible/roles/hadoop-ha/tasks/format-nn.yml
rename to ansible/roles/hadoop/tasks/format-nn.yml
diff --git a/ansible/roles/hadoop-ha/tasks/format-zk.yml b/ansible/roles/hadoop/tasks/format-zk.yml
similarity index 100%
rename from ansible/roles/hadoop-ha/tasks/format-zk.yml
rename to ansible/roles/hadoop/tasks/format-zk.yml
diff --git a/ansible/roles/hadoop/tasks/start-hdfs.yml b/ansible/roles/hadoop/tasks/start-hdfs.yml
index a05a4f8..8c61b02 100644
--- a/ansible/roles/hadoop/tasks/start-hdfs.yml
+++ b/ansible/roles/hadoop/tasks/start-hdfs.yml
@@ -15,10 +15,6 @@
# limitations under the License.
#
-- name: "format namenode"
- command: "{{ hadoop_home }}/bin/hdfs namenode -format"
- args:
- creates: "{{ worker_data_dirs[0] }}/hadoop/name"
- name: "start hdfs"
command: "{{ hadoop_home }}/sbin/start-dfs.sh"
register: start_hdfs
diff --git a/ansible/roles/hadoop-ha/tasks/start-journal.yml b/ansible/roles/hadoop/tasks/start-journal.yml
similarity index 100%
rename from ansible/roles/hadoop-ha/tasks/start-journal.yml
rename to ansible/roles/hadoop/tasks/start-journal.yml
diff --git a/ansible/roles/hadoop-ha/tasks/start-nn1.yml b/ansible/roles/hadoop/tasks/start-nn1.yml
similarity index 100%
rename from ansible/roles/hadoop-ha/tasks/start-nn1.yml
rename to ansible/roles/hadoop/tasks/start-nn1.yml
diff --git a/ansible/roles/hadoop-ha/tasks/start-nn2.yml b/ansible/roles/hadoop/tasks/start-nn2.yml
similarity index 100%
rename from ansible/roles/hadoop-ha/tasks/start-nn2.yml
rename to ansible/roles/hadoop/tasks/start-nn2.yml
diff --git a/ansible/roles/hadoop-ha/tasks/start-zkfc.yml b/ansible/roles/hadoop/tasks/start-zkfc.yml
similarity index 100%
rename from ansible/roles/hadoop-ha/tasks/start-zkfc.yml
rename to ansible/roles/hadoop/tasks/start-zkfc.yml
diff --git a/ansible/roles/hadoop/templates/core-site.xml b/ansible/roles/hadoop/templates/core-site.xml
index c5f1597..85b88a0 100644
--- a/ansible/roles/hadoop/templates/core-site.xml
+++ b/ansible/roles/hadoop/templates/core-site.xml
@@ -21,8 +21,6 @@
<!-- Put site-specific property overrides in this file. -->
-<!-- when editing this file please consider if changes are also needed in roles/hadoop-ha/templates -->
-
<configuration>
<property>
<name>fs.defaultFS</name>
@@ -36,6 +34,12 @@
<name>dfs.domain.socket.path</name>
<value>/var/lib/hadoop-hdfs/dn_socket</value>
</property>
+{% if hdfs_ha == True %}
+ <property>
+ <name>ha.zookeeper.quorum</name>
+ <value>{{ zookeeper_connect }}</value>
+ </property>
+{% endif %}
{% if use_adlsg2 == True %}
<property>
<name>fs.azure.account.auth.type</name>
diff --git a/ansible/roles/hadoop/templates/hdfs-site.xml b/ansible/roles/hadoop/templates/hdfs-site.xml
index 557d7db..3877b82 100644
--- a/ansible/roles/hadoop/templates/hdfs-site.xml
+++ b/ansible/roles/hadoop/templates/hdfs-site.xml
@@ -21,8 +21,6 @@
<!-- Put site-specific property overrides in this file. -->
-<!-- when editing this file please consider if changes are also needed in roles/hadoop-ha/templates -->
-
<configuration>
<property>
<name>dfs.datanode.synconclose</name>
@@ -39,6 +37,7 @@
{%- if not loop.last -%} , {%- endif -%}
{%- endfor %}</value>
</property>
+{% if hdfs_ha == False %}
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>{{ groups['namenode'][0] }}:50090</value>
@@ -47,6 +46,60 @@
<name>dfs.namenode.secondary.https-address</name>
<value>{{ groups['namenode'][0] }}:50091</value>
</property>
+{% else %}
+ <property>
+ <name>dfs.nameservices</name>
+ <value>{{ nameservice_id }}</value>
+ </property>
+ <property>
+ <name>dfs.ha.namenodes.{{ nameservice_id }}</name>
+ <value>nn1,nn2</value>
+ </property>
+ <property>
+ <name>dfs.namenode.rpc-address.{{ nameservice_id }}.nn1</name>
+ <value>{{ groups['namenode'][0] }}:8020</value>
+ </property>
+ <property>
+ <name>dfs.namenode.rpc-address.{{ nameservice_id }}.nn2</name>
+ <value>{{ groups['namenode'][1] }}:8020</value>
+ </property>
+ <property>
+ <name>dfs.namenode.http-address.{{ nameservice_id }}.nn1</name>
+ <value>{{ groups['namenode'][0] }}:50070</value>
+ </property>
+ <property>
+ <name>dfs.namenode.http-address.{{ nameservice_id }}.nn2</name>
+ <value>{{ groups['namenode'][1] }}:50070</value>
+ </property>
+ <property>
+ <name>dfs.namenode.https-address.{{ nameservice_id }}.nn1</name>
+ <value>{{ groups['namenode'][0] }}:50071</value>
+ </property>
+ <property>
+ <name>dfs.namenode.https-address.{{ nameservice_id }}.nn2</name>
+ <value>{{ groups['namenode'][1] }}:50071</value>
+ </property>
+ <property>
+ <name>dfs.namenode.shared.edits.dir</name>
+ <value>qjournal://{{ journal_quorum }}/{{ nameservice_id }}</value>
+ </property>
+ <property>
+ <name>dfs.client.failover.proxy.provider.{{ nameservice_id }}</name>
+ <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+ </property>
+ <property>
+ <name>dfs.ha.fencing.methods</name>
+ <value>shell(/usr/bin/true)</value>
+ </property>
+ <property>
+ <name>dfs.journalnode.edits.dir</name>
+ <value>{{ worker_data_dirs[0] }}/hadoop/journal</value>
+ </property>
+ <property>
+ <name>dfs.ha.automatic-failover.enabled</name>
+ <value>true</value>
+ </property>
+{% endif %}
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
diff --git a/ansible/roles/hadoop/templates/mapred-site.xml b/ansible/roles/hadoop/templates/mapred-site.xml
index 7ecf751..90024ad 100644
--- a/ansible/roles/hadoop/templates/mapred-site.xml
+++ b/ansible/roles/hadoop/templates/mapred-site.xml
@@ -21,8 +21,6 @@
<!-- Put site-specific property overrides in this file. -->
-<!-- when editing this file please consider if changes are also needed in roles/hadoop-ha/templates -->
-
<configuration>
<property>
<name>mapreduce.framework.name</name>
diff --git a/ansible/roles/hadoop/templates/yarn-site.xml b/ansible/roles/hadoop/templates/yarn-site.xml
index 847f98b..afce378 100644
--- a/ansible/roles/hadoop/templates/yarn-site.xml
+++ b/ansible/roles/hadoop/templates/yarn-site.xml
@@ -21,8 +21,6 @@
<!-- Put site-specific property overrides in this file. -->
-<!-- when editing this file please consider if changes are also needed in roles/hadoop-ha/templates -->
-
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
diff --git a/lib/muchos/existing.py b/lib/muchos/existing.py
index c886531..501497d 100644
--- a/lib/muchos/existing.py
+++ b/lib/muchos/existing.py
@@ -47,10 +47,7 @@
print("- import_playbook: common.yml", file=site_file)
print("- import_playbook: zookeeper.yml", file=site_file)
- if config.get("general","hdfs_ha") == 'True':
- print("- import_playbook: hadoop-ha.yml", file=site_file)
- else:
- print("- import_playbook: hadoop.yml", file=site_file)
+ print("- import_playbook: hadoop.yml", file=site_file)
if config.has_service("spark"):
print("- import_playbook: spark.yml", file=site_file)