blob: 47e41c017def74e66c3f22e18048cde2b9833359 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# To install Terraform download the appropriate version from https://www.terraform.io/downloads.html
# and copy the binary to /usr/local/bin or some other location on your PATH.
#
# Run "terraform init" in this directory to download the plugins that Terraform will need to run
# this plan.
#
# Run "terraform plan" to see the changes that would be made if you applied this plan.
#
# Run "terraform apply" to see the changes that would be made and to optionally apply the plan.
#
#
# This Terraform configuration does the following:
#
# 1. Create a virtual network, subnet, and network security group in Azure.
#
# 2. Create a NIC (attached to the security group) and VM in Azure for the manager.
#
# 3. Create a NIC (attached to the security group) and VM in Azure for each worker node.
#
# 4. VMs are created with a customized cloud-init script, and we wait for this script to complete.
#
# 5. Upload config files and installs the software on each node.
#
################################
# Core Terraform Configuration #
################################
terraform {
required_version = ">= 1.1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 3.0"
}
}
backend "azurerm" {
resource_group_name = "accumulo-testing-tf-state"
storage_account_name = "accumulotesttfsteast"
container_name = "accumulo-testing-tf-state"
key = "accumulo-testing/terraform.tfstate"
}
}
provider "azurerm" {
features {}
}
locals {
ssh_keys = toset(concat(var.authorized_ssh_keys, [for k in var.authorized_ssh_key_files : file(k)]))
# Resource group name and location
# This is pulled either from the resource group that was created (if create_resource_group is true)
# or from the resource group that already exists (if create_resource_group is false). Keeping
# references to the resource group or data object rather than just using var.resource_group_name
# allows for terraform to automatically create the dependency graph and wait for the resource group
# to be created if necessary.
rg_name = var.create_resource_group ? azurerm_resource_group.rg[0].name : data.azurerm_resource_group.existing_rg[0].name
location = var.create_resource_group ? azurerm_resource_group.rg[0].location : data.azurerm_resource_group.existing_rg[0].location
# Save the public/private IP addresses of the VMs to pass to sub-modules.
manager_ip = azurerm_linux_virtual_machine.manager.public_ip_address
worker_ips = azurerm_linux_virtual_machine.workers[*].public_ip_address
manager_private_ip = azurerm_linux_virtual_machine.manager.private_ip_address
worker_private_ips = azurerm_linux_virtual_machine.workers[*].private_ip_address
# This script is run on all node to ensure a "ready" state.
# Ready means ready to continue provisioning.
ready_script = [
"echo Waiting for cloud init to complete...",
"sudo cloud-init status --wait > /dev/null",
"sudo cloud-init status --long"
]
}
data "azurerm_resource_group" "existing_rg" {
count = var.create_resource_group ? 0 : 1
name = var.resource_group_name
}
# Place all resources in a resource group
resource "azurerm_resource_group" "rg" {
count = var.create_resource_group ? 1 : 0
name = var.resource_group_name
location = var.location
}
#########################
# Network Configuration #
#########################
# Creates a virtual network for use by this cluster.
resource "azurerm_virtual_network" "accumulo_vnet" {
name = "${var.resource_name_prefix}-vnet"
resource_group_name = local.rg_name
location = local.location
address_space = var.network_address_space
}
# Create a subnet for this cluster. Give storage a service endpoint
# so that we'll be able to create an NFS share.
resource "azurerm_subnet" "internal" {
name = "${var.resource_name_prefix}-subnet"
resource_group_name = local.rg_name
virtual_network_name = azurerm_virtual_network.accumulo_vnet.name
address_prefixes = var.subnet_address_prefixes
}
# Create a Network Security Group that only allows SSH (22)
# traffic from the internet and denies everything else.
resource "azurerm_network_security_group" "nsg" {
name = "${var.resource_name_prefix}-nsg"
location = local.location
resource_group_name = local.rg_name
security_rule {
name = "allow-ssh"
priority = 1001
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefix = "*"
destination_address_prefix = "*"
}
}
####################
# VM Configuration #
####################
# Generate cloud-init data to use when creating nodes.
module "cloud_init_config" {
source = "../modules/cloud-init-config"
lvm_mount_point = var.managed_disk_configuration != null ? var.managed_disk_configuration.mount_point : null
lvm_disk_count = var.managed_disk_configuration != null ? var.managed_disk_configuration.disk_count : null
software_root = var.software_root
zookeeper_dir = var.zookeeper_dir
hadoop_dir = var.hadoop_dir
accumulo_dir = var.accumulo_dir
maven_version = var.maven_version
zookeeper_version = var.zookeeper_version
hadoop_version = var.hadoop_version
accumulo_branch_name = var.accumulo_branch_name
accumulo_version = var.accumulo_version
authorized_ssh_keys = local.ssh_keys[*]
os_distro = var.os_distro
os_version = var.os_version
cluster_type = "azure"
optional_cloudinit_config = var.optional_cloudinit_config
cloudinit_merge_type = var.cloudinit_merge_type
}
# Create a static public IP address for the manager node.
resource "azurerm_public_ip" "manager" {
name = "${var.resource_name_prefix}-manager-ip"
resource_group_name = local.rg_name
location = local.location
allocation_method = "Static"
}
# Create a NIC for the manager node.
resource "azurerm_network_interface" "manager" {
name = "${var.resource_name_prefix}-manager-nic"
location = local.location
resource_group_name = local.rg_name
enable_accelerated_networking = true
ip_configuration {
name = "internal"
subnet_id = azurerm_subnet.internal.id
public_ip_address_id = azurerm_public_ip.manager.id
private_ip_address_allocation = "Dynamic"
}
}
# Associate the manager node's NIC with the network security group.
resource "azurerm_network_interface_security_group_association" "manager" {
network_interface_id = azurerm_network_interface.manager.id
network_security_group_id = azurerm_network_security_group.nsg.id
}
# Create a static public IP for each of the worker nodes.
resource "azurerm_public_ip" "workers" {
count = var.worker_count
name = "${var.resource_name_prefix}-worker${count.index}-ip"
resource_group_name = local.rg_name
location = local.location
allocation_method = "Static"
}
# Create a NIC for each of the worker nodes.
resource "azurerm_network_interface" "workers" {
count = var.worker_count
name = "${var.resource_name_prefix}-worker${count.index}-nic"
location = local.location
resource_group_name = local.rg_name
enable_accelerated_networking = true
ip_configuration {
name = "internal"
subnet_id = azurerm_subnet.internal.id
public_ip_address_id = azurerm_public_ip.workers[count.index].id
private_ip_address_allocation = "Dynamic"
}
}
# Associate each of the worker nodes' NIC with the network security group.
resource "azurerm_network_interface_security_group_association" "workers" {
count = var.worker_count
network_interface_id = azurerm_network_interface.workers[count.index].id
network_security_group_id = azurerm_network_security_group.nsg.id
}
# Create the manager VM.
# Add a login user that can SSH to the VM using the first supplied SSH key.
resource "azurerm_linux_virtual_machine" "manager" {
name = "${var.resource_name_prefix}-manager"
resource_group_name = local.rg_name
location = local.location
size = var.vm_sku
computer_name = "manager"
admin_username = var.admin_username
custom_data = base64encode(module.cloud_init_config.cloud_init_data)
disable_password_authentication = true
network_interface_ids = [
azurerm_network_interface.manager.id,
]
dynamic "admin_ssh_key" {
for_each = local.ssh_keys
content {
username = var.admin_username
public_key = admin_ssh_key.value
}
}
os_disk {
storage_account_type = var.os_disk_type
caching = var.os_disk_caching
disk_size_gb = var.os_disk_size_gb
}
source_image_reference {
publisher = var.vm_image.publisher
offer = var.vm_image.offer
sku = var.vm_image.sku
version = var.vm_image.version
}
}
# Create and attach managed disks to the manager VM.
resource "azurerm_managed_disk" "manager_managed_disk" {
count = var.managed_disk_configuration != null ? var.managed_disk_configuration.disk_count : 0
name = format("%s_disk%02d", azurerm_linux_virtual_machine.manager.name, count.index)
resource_group_name = local.rg_name
location = local.location
storage_account_type = var.managed_disk_configuration.storage_account_type
disk_size_gb = var.managed_disk_configuration.disk_size_gb
create_option = "Empty"
}
resource "azurerm_virtual_machine_data_disk_attachment" "manager_managed_disk_attachment" {
count = var.managed_disk_configuration != null ? var.managed_disk_configuration.disk_count : 0
managed_disk_id = azurerm_managed_disk.manager_managed_disk[count.index].id
virtual_machine_id = azurerm_linux_virtual_machine.manager.id
lun = 10 + count.index
caching = "ReadOnly"
}
# Wait for cloud-init to complete on the manager VM.
# This is done here rather than in the VM resource because the cloud-init script
# waits for managed disks to be attached (if used), but the managed disks cannot
# be attached until the VM is created, so we'd have a deadlock.
resource "null_resource" "wait_for_manager_cloud_init" {
provisioner "remote-exec" {
inline = local.ready_script
connection {
type = "ssh"
user = azurerm_linux_virtual_machine.manager.admin_username
host = azurerm_linux_virtual_machine.manager.public_ip_address
}
}
depends_on = [
azurerm_virtual_machine_data_disk_attachment.manager_managed_disk_attachment
]
}
# Create the worker VMs.
# Add a login user that can SSH to the VM using the first supplied SSH key.
resource "azurerm_linux_virtual_machine" "workers" {
count = var.worker_count
name = "${var.resource_name_prefix}-worker${count.index}"
resource_group_name = local.rg_name
location = local.location
size = var.vm_sku
computer_name = "worker${count.index}"
admin_username = var.admin_username
custom_data = base64encode(module.cloud_init_config.cloud_init_data)
disable_password_authentication = true
network_interface_ids = [
azurerm_network_interface.workers[count.index].id
]
dynamic "admin_ssh_key" {
for_each = local.ssh_keys
content {
username = var.admin_username
public_key = admin_ssh_key.value
}
}
os_disk {
storage_account_type = var.os_disk_type
caching = var.os_disk_caching
disk_size_gb = var.os_disk_size_gb
}
source_image_reference {
publisher = var.vm_image.publisher
offer = var.vm_image.offer
sku = var.vm_image.sku
version = var.vm_image.version
}
}
# Create and attach managed disks to the worker VMs.
locals {
worker_disks = var.managed_disk_configuration == null ? [] : flatten([
for vm_num, vm in azurerm_linux_virtual_machine.workers : [
for disk_num in range(var.managed_disk_configuration.disk_count) : {
datadisk_name = format("%s_disk%02d", vm.name, disk_num)
lun = 10 + disk_num
worker_num = vm_num
}
]
])
}
resource "azurerm_managed_disk" "worker_managed_disk" {
count = length(local.worker_disks)
name = local.worker_disks[count.index].datadisk_name
resource_group_name = local.rg_name
location = local.location
storage_account_type = var.managed_disk_configuration.storage_account_type
disk_size_gb = var.managed_disk_configuration.disk_size_gb
create_option = "Empty"
}
resource "azurerm_virtual_machine_data_disk_attachment" "worker_managed_disk_attachment" {
count = length(local.worker_disks)
managed_disk_id = azurerm_managed_disk.worker_managed_disk[count.index].id
virtual_machine_id = azurerm_linux_virtual_machine.workers[local.worker_disks[count.index].worker_num].id
lun = local.worker_disks[count.index].lun
caching = "ReadOnly"
}
# Wait for cloud-init to complete on the worker VMs.
# This is done here rather than in the VM resources because the cloud-init script
# waits for managed disks to be attached (if used), but the managed disks cannot
# be attached until the VMs are created, so we'd have a deadlock.
resource "null_resource" "wait_for_workers_cloud_init" {
count = length(azurerm_linux_virtual_machine.workers)
provisioner "remote-exec" {
inline = local.ready_script
connection {
type = "ssh"
user = azurerm_linux_virtual_machine.workers[count.index].admin_username
host = azurerm_linux_virtual_machine.workers[count.index].public_ip_address
}
}
depends_on = [
azurerm_virtual_machine_data_disk_attachment.worker_managed_disk_attachment
]
}
##############################
# Cluster Configuration #
##############################
#
# This section creates the ZooKeeper, Hadoop, and Accumulo configuration files
# using templates in the templates directory and IP addresses from the EC2
# nodes that we created above and variables.
#
module "config_files" {
source = "../modules/config-files"
os_distro = var.os_distro
software_root = var.software_root
upload_host = local.manager_ip
manager_ip = local.manager_private_ip
worker_ips = local.worker_private_ips
zookeeper_dir = var.zookeeper_dir
hadoop_dir = var.hadoop_dir
accumulo_dir = var.accumulo_dir
maven_version = var.maven_version
zookeeper_version = var.zookeeper_version
hadoop_version = var.hadoop_version
accumulo_version = var.accumulo_version
accumulo_repo = var.accumulo_repo
accumulo_branch_name = var.accumulo_branch_name
accumulo_testing_repo = var.accumulo_testing_repo
accumulo_testing_branch_name = var.accumulo_testing_branch_name
accumulo_instance_name = var.accumulo_instance_name
accumulo_root_password = var.accumulo_root_password
depends_on = [
null_resource.wait_for_manager_cloud_init
]
}
#
# This module uploads any local tarballs to the manager VM and
# stores them on the NFS share.
#
module "upload_software" {
source = "../modules/upload-software"
local_sources_dir = var.local_sources_dir
upload_dir = var.software_root
upload_host = local.manager_ip
depends_on = [
null_resource.wait_for_manager_cloud_init
]
}
#
# This section performs final configuration of the Accumulo cluster.
#
module "configure_nodes" {
source = "../modules/configure-nodes"
software_root = var.software_root
upload_host = local.manager_ip
accumulo_instance_name = module.config_files.accumulo_instance_name
accumulo_root_password = module.config_files.accumulo_root_password
depends_on = [
module.upload_software,
module.config_files,
null_resource.wait_for_workers_cloud_init
]
}
##############################
# Outputs #
##############################
output "manager_ip" {
value = local.manager_ip
description = "The public IP address of the manager VM."
}
output "worker_ips" {
value = local.worker_ips
description = "The public IP addresses of the worker VMs."
}
output "accumulo_root_password" {
value = module.config_files.accumulo_root_password
description = "The user-supplied or automatically generated Accumulo root user password."
}