Allow Azure managed disk caching to be configured (#398)

* Change caching default from None to ReadOnly since this is the
  recommended mode for best performance on a Hadoop application workload
* Allow data disk caching option to be configured in muchos.props and
  and also for multiple VMSS configurations
diff --git a/ansible/roles/azure/tasks/create_multiple_vmss.yml b/ansible/roles/azure/tasks/create_multiple_vmss.yml
index 1e2a224..151c620 100644
--- a/ansible/roles/azure/tasks/create_multiple_vmss.yml
+++ b/ansible/roles/azure/tasks/create_multiple_vmss.yml
@@ -59,7 +59,7 @@
     data_disks: |
       {%- set data_disks = [] -%}
       {%- for lun in range(item.data_disk_count) -%}
-        {%- set _ = data_disks.append({'lun': lun, 'disk_size_gb': item.data_disk_size_gb, 'managed_disk_type': item.data_disk_sku, 'caching': None }) -%}
+        {%- set _ = data_disks.append({'lun': lun, 'disk_size_gb': item.data_disk_size_gb, 'managed_disk_type': item.data_disk_sku, 'caching': item.data_disk_caching|default('ReadOnly') }) -%}
       {%- endfor -%}
       {{ data_disks }}
   with_items:
diff --git a/ansible/roles/azure/tasks/create_vmss.yml b/ansible/roles/azure/tasks/create_vmss.yml
index eadf05f..7dec7c4 100644
--- a/ansible/roles/azure/tasks/create_vmss.yml
+++ b/ansible/roles/azure/tasks/create_vmss.yml
@@ -31,7 +31,7 @@
 
 - name: Create luns dictionary
   set_fact:
-    luns_dict: "{{ luns_dict | default ([]) + [{ 'lun': item, 'disk_size_gb': disk_size_gb, 'managed_disk_type': data_disk_sku, 'caching': None } ] }}"
+    luns_dict: "{{ luns_dict | default ([]) + [{ 'lun': item, 'disk_size_gb': disk_size_gb, 'managed_disk_type': data_disk_sku, 'caching': data_disk_caching } ] }}"
   with_sequence: start=0 end={{ data_disk_count-1 if data_disk_count > 0 else 0 }}
 
 - name: Set single placement group to correct value
diff --git a/conf/azure_multiple_vmss_vars.yml.example b/conf/azure_multiple_vmss_vars.yml.example
index d66dbec..176c770 100644
--- a/conf/azure_multiple_vmss_vars.yml.example
+++ b/conf/azure_multiple_vmss_vars.yml.example
@@ -7,6 +7,7 @@
     data_disk_count: 4
     data_disk_sku: Premium_LRS
     data_disk_size_gb: 512
+    data_disk_caching: ReadOnly
     capacity: 4
     roles:
       namenode: 1
@@ -22,6 +23,7 @@
     data_disk_count: 4
     data_disk_sku: Standard_LRS
     data_disk_size_gb: 512
+    data_disk_caching: ReadOnly
     capacity: 4
     roles:
       zookeeper: 2
@@ -37,6 +39,7 @@
     data_disk_count: 8
     data_disk_sku: Standard_LRS
     data_disk_size_gb: 1024
+    data_disk_caching: ReadOnly
     capacity: 4
     roles:
       worker: 4
@@ -48,6 +51,7 @@
     data_disk_count: 4
     data_disk_sku: Premium_LRS
     data_disk_size_gb: 512
+    data_disk_caching: ReadOnly
     capacity: 3
     roles:
       namenode: 1
@@ -61,6 +65,7 @@
     data_disk_count: 4
     data_disk_sku: Premium_LRS
     data_disk_size_gb: 512
+    data_disk_caching: ReadOnly
     capacity: 1
     roles:
       metrics: 1
@@ -71,6 +76,7 @@
     data_disk_count: 8
     data_disk_sku: Standard_LRS
     data_disk_size_gb: 1024
+    data_disk_caching: ReadOnly
     capacity: 3
     roles:
       worker: 3
@@ -88,6 +94,9 @@
     data_disk_count: 0
     data_disk_sku: Standard_LRS
     data_disk_size_gb: 1024
+    # This is just an example to show that caching can be set to different
+    # values for each VMSS.
+    data_disk_caching: None
     capacity: 4
     roles:
       worker: 4
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index c7f60e8..7449807 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -140,6 +140,8 @@
 data_disk_count = 3
 # The size of each managed disk provisioned
 disk_size_gb = 128
+# Indicates the host caching that should be used for data disks. Valid values are ReadOnly, ReadWrite, or None
+data_disk_caching = ReadOnly
 # Location to mount managed disks in each VM
 mount_root = /var/data
 # Location where the metrics data will be written
diff --git a/docs/azure-multiple-vmss.md b/docs/azure-multiple-vmss.md
index 0f8458c..100febf 100644
--- a/docs/azure-multiple-vmss.md
+++ b/docs/azure-multiple-vmss.md
@@ -29,6 +29,7 @@
 | `data_disk_count`| Required | - | An integer value which specifies the number of persistent (managed) data disks to be attached to each VM in the VMSS. It can be 0 in specific cases - see [notes on using ephemeral storage](./azure-ephemeral-disks.md) for details |
 | `data_disk_sku`| Required | - | Can be either Standard_LRS (for HDD) or Premium_LRS (for Premium SSD). At this time, we have not tested the use of Standard SSD or UltraSSD with Muchos |
 | `data_disk_size_gb`| Required | - | An integer value specifying the size of each persistent (managed) data disk in GiB |
+| `data_disk_caching`| Optional | ReadOnly | One of None, ReadOnly, or ReadWrite indicating the type of host caching to use for each persistent (managed) disk |
 | `image_reference`| Optional | If not specified, the corresponding `azure_image_reference` value from the `azure` section in [muchos.props](../conf/muchos.props.example) is used | Azure image reference defined as a pipe-delimited string.
 | `capacity`| Required | - | An integer value specifying the number of VMs in this specific VMSS |
 | `roles`| Required | - | This is a dictionary (list of key-value pairs), each of which should be of the form `muchos_role_name`: `integer count`. See [sample file](../conf/azure_multiple_vmss_vars.yml.example) for examples. the `muchos launch` command for Azure clusters uses this list to assign roles to hosts in a sequential fashion. For example, if a given VMSS has 3 `zkfc` role members and 2 `namenode` role members defined, host0 and host1 in the VMSS will be assigned both `zkfc` and `namenode` roles, and host2 in the VMSS will just be assigned a `zkfc` role |
diff --git a/lib/muchos/config/azure.py b/lib/muchos/config/azure.py
index 5c1d8db..06e131e 100644
--- a/lib/muchos/config/azure.py
+++ b/lib/muchos/config/azure.py
@@ -149,6 +149,12 @@
         return self.getint("azure", "disk_size_gb")
 
     @ansible_host_var
+    @default("ReadOnly")
+    @is_valid(is_in(["ReadOnly", "ReadWrite", "None"]))
+    def data_disk_caching(self):
+        return self.getint("azure", "data_disk_caching")
+
+    @ansible_host_var
     @default("/dev/disk/azure/scsi1")
     def azure_disk_device_path(self):
         return self.get("azure", "azure_disk_device_path")