Allow Azure managed disk caching to be configured (#398)
* Change caching default from None to ReadOnly since this is the
recommended mode for best performance on a Hadoop application workload
* Allow data disk caching option to be configured in muchos.props and
and also for multiple VMSS configurations
diff --git a/ansible/roles/azure/tasks/create_multiple_vmss.yml b/ansible/roles/azure/tasks/create_multiple_vmss.yml
index 1e2a224..151c620 100644
--- a/ansible/roles/azure/tasks/create_multiple_vmss.yml
+++ b/ansible/roles/azure/tasks/create_multiple_vmss.yml
@@ -59,7 +59,7 @@
data_disks: |
{%- set data_disks = [] -%}
{%- for lun in range(item.data_disk_count) -%}
- {%- set _ = data_disks.append({'lun': lun, 'disk_size_gb': item.data_disk_size_gb, 'managed_disk_type': item.data_disk_sku, 'caching': None }) -%}
+ {%- set _ = data_disks.append({'lun': lun, 'disk_size_gb': item.data_disk_size_gb, 'managed_disk_type': item.data_disk_sku, 'caching': item.data_disk_caching|default('ReadOnly') }) -%}
{%- endfor -%}
{{ data_disks }}
with_items:
diff --git a/ansible/roles/azure/tasks/create_vmss.yml b/ansible/roles/azure/tasks/create_vmss.yml
index eadf05f..7dec7c4 100644
--- a/ansible/roles/azure/tasks/create_vmss.yml
+++ b/ansible/roles/azure/tasks/create_vmss.yml
@@ -31,7 +31,7 @@
- name: Create luns dictionary
set_fact:
- luns_dict: "{{ luns_dict | default ([]) + [{ 'lun': item, 'disk_size_gb': disk_size_gb, 'managed_disk_type': data_disk_sku, 'caching': None } ] }}"
+ luns_dict: "{{ luns_dict | default ([]) + [{ 'lun': item, 'disk_size_gb': disk_size_gb, 'managed_disk_type': data_disk_sku, 'caching': data_disk_caching } ] }}"
with_sequence: start=0 end={{ data_disk_count-1 if data_disk_count > 0 else 0 }}
- name: Set single placement group to correct value
diff --git a/conf/azure_multiple_vmss_vars.yml.example b/conf/azure_multiple_vmss_vars.yml.example
index d66dbec..176c770 100644
--- a/conf/azure_multiple_vmss_vars.yml.example
+++ b/conf/azure_multiple_vmss_vars.yml.example
@@ -7,6 +7,7 @@
data_disk_count: 4
data_disk_sku: Premium_LRS
data_disk_size_gb: 512
+ data_disk_caching: ReadOnly
capacity: 4
roles:
namenode: 1
@@ -22,6 +23,7 @@
data_disk_count: 4
data_disk_sku: Standard_LRS
data_disk_size_gb: 512
+ data_disk_caching: ReadOnly
capacity: 4
roles:
zookeeper: 2
@@ -37,6 +39,7 @@
data_disk_count: 8
data_disk_sku: Standard_LRS
data_disk_size_gb: 1024
+ data_disk_caching: ReadOnly
capacity: 4
roles:
worker: 4
@@ -48,6 +51,7 @@
data_disk_count: 4
data_disk_sku: Premium_LRS
data_disk_size_gb: 512
+ data_disk_caching: ReadOnly
capacity: 3
roles:
namenode: 1
@@ -61,6 +65,7 @@
data_disk_count: 4
data_disk_sku: Premium_LRS
data_disk_size_gb: 512
+ data_disk_caching: ReadOnly
capacity: 1
roles:
metrics: 1
@@ -71,6 +76,7 @@
data_disk_count: 8
data_disk_sku: Standard_LRS
data_disk_size_gb: 1024
+ data_disk_caching: ReadOnly
capacity: 3
roles:
worker: 3
@@ -88,6 +94,9 @@
data_disk_count: 0
data_disk_sku: Standard_LRS
data_disk_size_gb: 1024
+ # This is just an example to show that caching can be set to different
+ # values for each VMSS.
+ data_disk_caching: None
capacity: 4
roles:
worker: 4
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index c7f60e8..7449807 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -140,6 +140,8 @@
data_disk_count = 3
# The size of each managed disk provisioned
disk_size_gb = 128
+# Indicates the host caching that should be used for data disks. Valid values are ReadOnly, ReadWrite, or None
+data_disk_caching = ReadOnly
# Location to mount managed disks in each VM
mount_root = /var/data
# Location where the metrics data will be written
diff --git a/docs/azure-multiple-vmss.md b/docs/azure-multiple-vmss.md
index 0f8458c..100febf 100644
--- a/docs/azure-multiple-vmss.md
+++ b/docs/azure-multiple-vmss.md
@@ -29,6 +29,7 @@
| `data_disk_count`| Required | - | An integer value which specifies the number of persistent (managed) data disks to be attached to each VM in the VMSS. It can be 0 in specific cases - see [notes on using ephemeral storage](./azure-ephemeral-disks.md) for details |
| `data_disk_sku`| Required | - | Can be either Standard_LRS (for HDD) or Premium_LRS (for Premium SSD). At this time, we have not tested the use of Standard SSD or UltraSSD with Muchos |
| `data_disk_size_gb`| Required | - | An integer value specifying the size of each persistent (managed) data disk in GiB |
+| `data_disk_caching`| Optional | ReadOnly | One of None, ReadOnly, or ReadWrite indicating the type of host caching to use for each persistent (managed) disk |
| `image_reference`| Optional | If not specified, the corresponding `azure_image_reference` value from the `azure` section in [muchos.props](../conf/muchos.props.example) is used | Azure image reference defined as a pipe-delimited string.
| `capacity`| Required | - | An integer value specifying the number of VMs in this specific VMSS |
| `roles`| Required | - | This is a dictionary (list of key-value pairs), each of which should be of the form `muchos_role_name`: `integer count`. See [sample file](../conf/azure_multiple_vmss_vars.yml.example) for examples. the `muchos launch` command for Azure clusters uses this list to assign roles to hosts in a sequential fashion. For example, if a given VMSS has 3 `zkfc` role members and 2 `namenode` role members defined, host0 and host1 in the VMSS will be assigned both `zkfc` and `namenode` roles, and host2 in the VMSS will just be assigned a `zkfc` role |
diff --git a/lib/muchos/config/azure.py b/lib/muchos/config/azure.py
index 5c1d8db..06e131e 100644
--- a/lib/muchos/config/azure.py
+++ b/lib/muchos/config/azure.py
@@ -149,6 +149,12 @@
return self.getint("azure", "disk_size_gb")
@ansible_host_var
+ @default("ReadOnly")
+ @is_valid(is_in(["ReadOnly", "ReadWrite", "None"]))
+ def data_disk_caching(self):
+ return self.getint("azure", "data_disk_caching")
+
+ @ansible_host_var
@default("/dev/disk/azure/scsi1")
def azure_disk_device_path(self):
return self.get("azure", "azure_disk_device_path")