| #cloud-config |
| # |
| # Don't log key information |
| # |
| no_ssh_fingerprints: true |
| # Create the hadoop and docker groups |
| groups: |
| - hadoop |
| - docker |
| # Add default auto created user to docker group |
| system_info: |
| default_user: |
| groups: [docker] |
| # Create users |
| users: |
| - default |
| - name: hadoop |
| homedir: /home/hadoop |
| no_create_home: false |
| primary_group: hadoop |
| groups: docker |
| shell: /bin/bash |
| sudo: ALL=(ALL) NOPASSWD:ALL |
| # |
| # NOTE: The ssh_authorized_keys section of the hadoop user configuration should contain |
| # the public key for every developer that is going to log into the nodes. This |
| # allows the developer to log into the nodes using the command: ssh hadoop@<ip> |
| # |
| ssh_authorized_keys: |
| %{ for key in authorized_ssh_keys ~} |
| - ${key} |
| %{ endfor ~} |
| |
| %{ if os_distro == "centos" || os_distro == "rhel" ~} |
| yum_repos: |
| docker: |
| name: Docker CE Stable - $basearch |
| baseurl: https://download.docker.com/linux/centos/$releasever/$basearch/stable |
| enabled: false |
| gpgcheck: true |
| gpgkey: https://download.docker.com/linux/centos/gpg |
| %{ endif ~} |
| %{ if os_distro == "ubuntu" ~} |
| apt: |
| sources: |
| docker.list: |
| source: deb [arch=amd64] https://download.docker.com/linux/ubuntu $RELEASE stable |
| keyid: 9DC858229FC7DD38854AE2D88D81803C0EBFCD88 |
| %{ endif ~} |
| |
| # |
| # yum/apt install the following packages |
| # |
| packages: |
| %{ if os_distro == "centos" || os_distro == "rhel" ~} |
| %{ if os_distro == "centos" ~} |
| - epel-release |
| %{ endif ~} |
| %{ if os_distro == "rhel" ~} |
| - https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm |
| %{ endif ~} |
| - yum-utils |
| - gcc-c++ |
| - java-11-openjdk-devel |
| %{ if centos7 == "false" ~} |
| - java-17-openjdk-devel |
| %{ endif ~} |
| - git |
| %{ endif ~} |
| %{ if os_distro == "ubuntu" ~} |
| - net-tools |
| - g++ |
| - openjdk-11-jdk-headless |
| - openjdk-17-jdk |
| - pdsh |
| - pssh |
| %{ endif ~} |
| - make |
| - openssl |
| - wget |
| |
| runcmd: |
| %{ if cluster_type == "azure" && lvm_mount_point != "" ~} |
| - /usr/local/bin/format-lvm-data-disk.sh ${lvm_disk_count} ${lvm_mount_point} hadoop.hadoop |
| %{ endif ~} |
| - mkdir -p ${software_root} ${zookeeper_dir} ${hadoop_dir} ${accumulo_dir} |
| - chown hadoop.hadoop ${software_root} ${zookeeper_dir} ${hadoop_dir} ${accumulo_dir} |
| %{ if os_distro == "ubuntu" ~} |
| # Use bash instead of dash for the default shell |
| - ln -s bash /bin/sh.bash |
| - mv /bin/sh.bash /bin/sh |
| %{ endif ~} |
| %{ if os_distro == "centos" || os_distro == "rhel" ~} |
| # This has to be done here vs in the packages section because |
| # we install the epel-release package there and can't update |
| # the yum repo in the middle to make it know about pdsh-mod-genders |
| - yum -y update |
| - yum -y install pdsh-mod-genders pssh |
| %{ if os_distro == "rhel" ~} |
| - yum -y install pdsh-rcmd-ssh |
| %{ endif ~} |
| %{ if centos7 == "true" ~} |
| - wget -O /tmp/jdk17.tar.gz https://github.com/adoptium/temurin17-binaries/releases/download/jdk-17.0.9%2B9/OpenJDK17U-jdk_x64_linux_hotspot_17.0.9_9.tar.gz |
| - mkdir /usr/lib/jvm/java-17-openjdk |
| - tar zxf /tmp/jdk17.tar.gz --strip-components=1 -C /usr/lib/jvm/java-17-openjdk |
| %{ endif ~} |
| %{ endif ~} |
| - sysctl -w vm.swappiness=0 |
| - sysctl -p |
| # Move Hadoop ssh files in place. We couldn't do this directly in write_files |
| # because some distros with cloud-init have the write_files module run before |
| # the users are created. |
| - mv /run/hadoop_ssh/* /home/hadoop/.ssh |
| - rm -rf /run/hadoop_ssh |
| - chown -R hadoop:hadoop /home/hadoop/.ssh |
| |
| # |
| # Write files to the filesystem, will be copied into place |
| # or invoked later |
| # |
| write_files: |
| # Set up PDSH to skip strict host key checking. Also, on ubuntu, we need |
| # to set ssh as the default method for the rcmd module. |
| %{ if os_distro == "ubuntu" ~} |
| - path: /etc/pdsh/rcmd_default |
| permissions: '0644' |
| content: | |
| ssh |
| %{ endif ~} |
| - path: /etc/profile.d/pdsh.sh |
| permissions: '0755' |
| content: | |
| export PDSH_SSH_ARGS_APPEND="-o StrictHostKeyChecking=no" |
| # Increase open files limits for the Hadoop user |
| - path: /etc/security/limits.conf |
| append: true |
| content: | |
| hadoop soft nofile 4096 |
| hadoop hard nofile 65535 |
| # Set up files for Hadoop's home. Due to a cloud-init issue with some |
| # cloud provider images, the write_files module will run before users |
| # are created, so we cannot change ownership to hadoop here. We must |
| # do that as a runcmd, which we know runs later. |
| - path: /run/hadoop_ssh/config |
| defer: true |
| permissions: '0600' |
| content: | |
| Host * |
| Compression yes |
| StrictHostKeyChecking no |
| - path: /run/hadoop_ssh/id_rsa |
| defer: true |
| permissions: '0600' |
| content: | |
| ${hadoop_private_key} |
| - path: /run/hadoop_ssh/id_rsa.pub |
| defer: true |
| permissions: '0644' |
| content: | |
| ${hadoop_public_key} |
| # Install some utility scripts |
| - path: /usr/local/bin/update-hosts-genders.sh |
| permissions: '0755' |
| content: | |
| ${indent(6, file("${files_path}/update-hosts-genders.sh"))} |
| %{ if cluster_type == "azure" ~} |
| - path: /usr/local/bin/format-lvm-data-disk.sh |
| permissions: '0755' |
| content: | |
| ${indent(6, file("${files_path}/azure-format-lvm-data-disk.sh"))} |
| %{ endif ~} |