blob: 8a0e66046708ffe4484cf44c4921e487dc5e17d7 [file] [log] [blame]
---
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# These Ansible tasks only run on the client machine where Muchos runs
# At a high level, the various sections in this file do the following:
# 1. Create an Azure ADLS Gen2 storage account.
# 2. Create User Assigned Identity.
# 3. Assign roles to storage accounts.
# 4. Create filesystem / container in storage accounts.
# 5. Update tenant_id, client_id and instance_volumes_preferred in muchos.props.
# 6. Assign User Assigned Identity to VMSS.
- name: Generate MD5 checksum based on resource_group name, vmss_name and cluster name
shell: set -o pipefail && echo -n {{ resource_group + vmss_name + location }}|md5sum|tr -cd "[:alnum:]"|cut -c 1-16|tr '[:upper:]' '[:lower:]'
args:
executable: bash
register: StorageAccountMD5
- name: Generate random names for storage account names
set_fact:
StorageAccountName: "{{ StorageAccountMD5.stdout + 99|random(seed=resource_group)|string + 99|random(seed=vmss_name)|string + 9|random(seed=location)|string }}"
- name: Initialize instance variables
set_fact:
InstanceVolumesAuto: []
InstanceVolumesManual: []
- name: Validate instance_volumes_input
fail: msg="Variable instance_volumes_input incorrectly specified, Both Manual and Auto cannot be specified at same time"
when: instance_volumes_input.split('|')[0].split(',') != [''] and instance_volumes_input.split('|')[1].split(',') != ['']
- name: Assign manual or auto-generated volumes
set_fact:
InstanceVolumesTemp: "{{ instance_volumes_input.split('|')[0].split(',')|list if instance_volumes_input.split('|')[0].split(',') != [''] else instance_volumes_input.split('|')[1].split(',')|list }}"
- name: Retrieve sequence end number to get the number of storage accounts
set_fact:
InstanceVolumesEndSequence: "{{ '1' if instance_volumes_input.split('|')[0].split(',') == [''] else InstanceVolumesTemp[0]|int }}"
- name: Generate names for Storage Accounts
set_fact:
InstanceVolumesAuto: "{{ InstanceVolumesAuto + ['abfss://'+'accumulodata'+'@'+StorageAccountName+item+'.'+InstanceVolumesTemp[1]+'/accumulo'] }}"
with_sequence: start=1 end={{ InstanceVolumesEndSequence|int }}
when: InstanceVolumesTemp[0]|int != 0
- name: Retrieve ABFSS values when specified manually
set_fact:
InstanceVolumesManual: "{{ InstanceVolumesManual + [ item ] }}"
loop:
"{{ InstanceVolumesTemp }}"
when: item.split('://')[0] == 'abfss' and instance_volumes_input.split('|')[0].split(',') == ['']
# This is final list of instance volumes
- name: Assign variables for auto-generation or manual for storage account creation
set_fact:
InstanceVolumes: "{{ InstanceVolumesManual if instance_volumes_input.split('|')[0].split(',') == [''] else InstanceVolumesAuto }}"
- name: Update instance_volumes_preferred in muchos.props
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
regexp: '^instance_volumes_preferred\s*=\s*|^[#]instance_volumes_preferred\s*=\s*'
line: "instance_volumes_preferred = {{ InstanceVolumes|join(',') }}"
# Not registering variable because storage values are not visible immediately
- name: Create ADLS Gen2 storage account using REST API
azure_rm_resource:
resource_group: "{{ resource_group }}"
provider: Storage
resource_type: storageAccounts
resource_name: "{{ item.split('@')[1].split('.')[0] }}"
api_version: '2019-04-01'
idempotency: yes
state: present
body:
sku:
name: "{{ adls_storage_type }}"
kind: StorageV2
properties:
isHnsEnabled: yes
location: "{{ location }}"
loop:
"{{ InstanceVolumes }}"
# Creating User Assigned identity with vmss_name suffixed by ua-msi if not specified in muchos.props
# Not registering variable because user identity values are not visible immediately
- name: Create User Assigned Identity
azure_rm_resource:
resource_group: "{{ resource_group }}"
provider: ManagedIdentity
resource_type: userAssignedIdentities
resource_name: "{{ user_assigned_identity if user_assigned_identity else vmss_name + '-ua-msi' }}"
api_version: '2018-11-30'
idempotency: yes
state: present
body:
location: "{{ location }}"
# Retrieving facts about User Assigned Identity
- name: Get facts for User Assigned Identity
azure_rm_resource_info:
resource_group: "{{ resource_group }}"
provider: ManagedIdentity
resource_type: userAssignedIdentities
resource_name: "{{ user_assigned_identity if user_assigned_identity else vmss_name + '-ua-msi' }}"
api_version: '2018-11-30'
register: UserAssignedIdentityInfo
retries: 20
delay: 15
until: UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='principalId')|join('') is defined
- name: Update principal_id in muchos.props
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
regexp: '^principal_id\s*=\s*|^[#]principal_id\s*=\s*'
line: "principal_id = {{ UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='principalId')|join('') }}"
# This will be used to assign the MSI for VMSS
- name: Format User Assigned Identity for API
set_fact:
UserAssignedIdentityArr: "{{ UserAssignedIdentityInfo.response|default({})|map(attribute='id')|map('regex_replace','^(.*)$','{\"\\1\":{}}')|list }}" # noqa 206
# Retrieve facts about role assignment
- name: Get role definition id for "Storage Blob Data Owner"
azure_rm_resource_info:
resource_group: "{{ resource_group }}"
provider: Authorization
resource_type: roleDefinitions
resource_name: b7e6dc6d-f1e8-4753-8033-0f276bb0955b
api_version: '2015-07-01'
register: RoleDefinitionInfo
# Retrieve storage account information
- name: Check if the storage accounts are visible
azure_rm_storageaccount_info:
resource_group: "{{ resource_group }}"
name: "{{ item.split('@')[1].split('.')[0] }}"
register: StorageAccountsInfo
retries: 20
delay: 15
until: StorageAccountsInfo.storageaccounts | map(attribute='id') | join('') | length > 0
loop:
"{{ InstanceVolumes }}"
# Retrieve storage accounts id created; these are used for account assignments
- name: Get the id of storage accounts created
set_fact:
StorageAccountsId: "{{ StorageAccountsInfo.results | sum(attribute='storageaccounts', start=[]) | map(attribute='id') | list | unique }}"
# Adding this module since role assignment fails if it already exists
- name: Get facts about role assignment
azure_rm_roleassignment_info:
scope: "{{ item }}"
assignee: "{{ UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='principalId')|list|join('') }}"
role_definition_id: "{{ RoleDefinitionInfo.response|map(attribute='id')|list|join('') }}"
register: RoleAssignmentResults
retries: 20
delay: 15
until: UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='principalId')|join('') is defined and RoleDefinitionInfo.response|map(attribute='id')|join('') is defined
loop:
"{{ StorageAccountsId }}"
- name: Set fact for getting storage accounts that have assigned roles
set_fact:
StorageAccountRoles: "{{ item|map(attribute='scope')|list|unique }}"
no_log: True
loop:
"{{ RoleAssignmentResults.results|map(attribute='roleassignments')|list }}"
# This retry logic is needed due to race condition between storage account create complete and role assignment
- name: Create a role assignment
azure_rm_roleassignment:
scope: "{{ item }}"
assignee_object_id: "{{ UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='principalId')|list|join('') }}"
role_definition_id: "{{ RoleDefinitionInfo.response|map(attribute='id')|list|join('') }}"
state: present
retries: 30
delay: 15
register: roleassignresult
until: roleassignresult is succeeded
loop:
"{{ StorageAccountsId }}"
when: item not in StorageAccountRoles
# This retry logic is needed due to race condition between storage account creation and creating filesystem
- name: Create container/Filesystem on ADLS Gen2
azure_rm_storageblob:
resource_group: "{{ resource_group }}"
storage_account_name: "{{ item.split('@')[1].split('.')[0] }}"
container: "{{ item.split('@')[0].split('://')[1] }}"
retries: 20
delay: 30
register: createfsresult
until: createfsresult is succeeded and ((not createfsresult.changed) or (createfsresult.changed and createfsresult.container|length > 0))
loop:
"{{ InstanceVolumes }}"
# Retrieve tenantId for core-site.xml
- name: Update tenantId in muchos.props
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
regexp: '^azure_tenant_id\s*=\s*|^[#]azure_tenant_id\s*=\s*'
line: "azure_tenant_id = {{ UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='tenantId')|list|join('') }}"
# Retrieve clientId for core-site.xml
- name: Update clientid in muchos.props
lineinfile:
path: "{{ deploy_path }}/conf/muchos.props"
regexp: '^azure_client_id\s*=\s*|^[#]azure_client_id\s*=\s*'
line: "azure_client_id = {{ UserAssignedIdentityInfo.response|map(attribute='properties')|map(attribute='clientId')|list|join('') }}"
- name: Assign User Assigned Identity to VMSS
azure_rm_resource:
resource_group: "{{ resource_group }}"
provider: Compute
resource_type: virtualMachineScaleSets
resource_name: "{{ vmss_name }}"
api_version: '2019-03-01'
body:
location: "{{ location }}"
identity:
type: UserAssigned
userAssignedIdentities: "{{ UserAssignedIdentityArr|join('') }}"