conf/muchos.props.example - fluo-muchos - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 [general]
 # Cluster type (Azure, ec2, or existing)
 cluster_type = ec2
 # Cluster user name (install command will SSH to cluster using this user)
 # Leave default below if launching cluster in AWS
 cluster_user = fedora
 # Cluster user group
 cluster_group = %(cluster_user)s
 # Cluster user home directory
 user_home = /home/%(cluster_user)s
 # Install directory where Hadoop, Accumulo, etc will be installed
 install_dir = %(user_home)s/install
 # Hostname of proxy node that Muchos will use to direct installation of cluster.  Will be given
 # public IP if launching in EC2.  If not launching in EC2, node must have public IP that can be reached
 # from your machine. Hostname can be chosen from "nodes" section below.
 proxy_hostname = leader1
 # If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
 proxy_socks_port = 38585
 # Accumulo Instance name
 accumulo_instance = muchos
 # Accumulo Password
 accumulo_password = secret
 # Software versions (make sure you have a corresponding entry for the checksum in conf/checksums)
 hadoop_version = 3.3.6
 zookeeper_version = 3.8.3
 spark_version = 2.4.8
 fluo_version = 1.2.0
 fluo_yarn_version = 1.0.0
 accumulo_version = 2.1.2
 # Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
 download_software = True
 # Install Hub (for GitHub)
 install_hub = True
 # The java package to install - by default OpenJDK 11. If you need to use 1.8, please change this.
 java_package=java-11-openjdk-devel
 # The package to use for java 1.8
 # java_package=java-1.8.0-openjdk-devel
 # Please read the High-Availability section of the README before switching to 'True'
 hdfs_ha = False
 # Give a logical name for the cluster, all one word no special characters. Required to support HDFS HA.
 nameservice_id = muchoshacluster
 # number of accumulo tablet servers to run per host. This is only effective when systemd is set to 'True'. Needs changes in
 # accumulo-service script in order to support non-systemd cases.
 num_tservers = 1
 # If accumulo services are to be run under systemd, set this to 'True'
 use_systemd = False
 # ELK stack
 elasticsearch_version = 7.10.2
 kibana_version = 7.10.2
 filebeat_version = 7.10.2
 logstash_version = 7.10.2

 [ec2]
 # AWS machine image to use. The default below is for a Fedora image (in us-east-1).
 # You may need to change this value if a new image has been released or you are running in a different region.
 aws_ami = ami-08b4ee602f76bff79
 # Type of AWS instance launched by default
 default_instance_type = m5d.large
 # Type of AWS instance launched for any node running 'worker' service
 # Leave default below to use same instance type set by 'default_instance_type' property
 worker_instance_type = %(default_instance_type)s
 # Enable template mode by selecting a template from conf/templates, in order to leverage your own
 # custom EC2 launch requests (optional). See conf/templates/README.md for more information
 #cluster_template = example
 # VPC to launch instances in (optional)
 #vpc_id = vpc-xxxxx
 # VPC Subnet to launch instances in (optional)
 #subnet_id = subnet-xxxxxx
 # Security group ID to launch in (optional)
 #security_group_id = sg-xxxxxx
 # Name of public key that will be loaded by Amazon on to your EC2 instances.
 # You can upload and name your public key using the EC2 Management Console.
 # Only the user with this key will be able to SSH to the cluster.
 # Name below should be your 'Key pair name' in EC2 and not name of your public key file.
 key_name = my_aws_key
 # Type of filesystem to format instance storage as.
 fstype = ext4
 # Force formatting of instance devices, even when it has an existing filesystem.
 force_format = no
 # Tags to add instances
 #instance_tags = key1:value1,key2:value2
 # Nodes will be given public IP addresses if true
 associate_public_ip = true
 # Path to file containing user data that will be executed at launch
 #user_data_path = /path/to/user_data
 # Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
 shutdown_delay_minutes = 0
 # Shutdown behavior of EC2 instances: terminate or stop
 shutdown_behavior = stop

 [azure]
 # Azure subscription ID
 azure_subscription_id =
 # Name of the Azure resource group to use. All resources are created within this
 # resource group. If a resource group with this name already exists, it will be
 # used, otherwise a new resource group with this name will be created and used.
 resource_group = accumulo-rg
 # Name of the Azure Virtual Network (VNET) to use. If a VNET with this name
 # already exists, it will be used; else a new VNET with this name will be created.
 vnet = vnet1
 # The CIDR prefix used to create the virtual network (VNET).
 vnet_cidr = 10.0.0.0/8
 # A single subnet is created within the VNET and given the following name.
 subnet = subnet1
 # The CIDR prefix used for the single subnet within the virtual network.
 subnet_cidr = 10.1.0.0/16
 #Optional. If set to True, will create multiple VMSS based on multiple_vmss_vars.yml
 use_multiple_vmss = False
 # The below "vmss_priority" controls whether Azure Spot is used (or not).
 # When vmss_priority is set to Spot, the cluster uses Azure Spot VM Scale Sets.
 # When vmss_priority is set to None (which is the default), Azure Spot is NOT used.
 # Note that for multiple-VMSS deployments, this setting can be overridden at a
 # per-VMSS level by specifying vmss_priority in the conf/azure_multiple_vmss_vars.yml file.
 vmss_priority = None
 # Azure image reference defined as a pipe-delimited string in the format offer|publisher|sku|version|image_id|
 # Please refer 'Launching an Azure cluster' section of the README before making changes
 azure_image_reference = almalinux-x86_64|almalinux|9-gen2|latest||
 # Image payment plan information - values required only if the image requires payment plan info.
 # The format of this configuration line is plan_name|product|publisher|
 azure_image_plan = |||
 # Cloud init file to use when creating VMs with the above image reference.
 # Currently, an Alma Linux 9 specific cloud-init file is used. Different files can be used if needed.
 azure_image_cloud_init_file = cloud-init-alma9.yml
 # Azure image reference defined as a pipe-delimited string in the format offer|publisher|sku|version|image_id|
 # This is the image that will be used for the proxy machine (if specified by azure_proxy_host). If
 # this is not set, then the value of azure_image_reference will be used on the proxy.
 # Please refer 'Launching an Azure cluster' section of the README before making changes
 #azure_proxy_image_reference = almalinux-x86_64|almalinux|9-gen2|latest||
 # Proxy image payment plan information - required only if the proxy image requires payment plan info.
 # If this is not set, the value of azure_image_plan will be used on the proxy.
 # The format of this configuration line is plan_name|product|publisher|
 # azure_proxy_image_plan = |||
 # Optional cloud init file to use when creating VMs with the above proxy image reference. If this is not set,
 # the value of azure_image_cloud_init_file will be used.
 # azure_proxy_image_cloud_init_file =
 # Optional OS disk size in GB. If not specified, the size of the OS disk will be as defined in the VM image
 # os_disk_size_gb = 48
 # Size of the cluster to provision.
 # A virtual machine scale set (VMSS) with these many VMs will be created.
 # The minimum allowed size for this is 3 nodes for non-HA & 4 nodes for HA setup
 numnodes = 8
 # The size of each virtual machine. See the following link for other sizes:
 # https://learn.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general
 vm_sku = Standard_D8s_v3
 # Each VM will be provisioned with the following type of managed disk
 # The azure_disk_device* parameters below specify the Linux device paths Muchos looks for when selecting disks for storage
 # The default values below are for using Azure managed disks
 azure_disk_device_path = /dev/disk/azure/scsi1
 azure_disk_device_pattern = lun*
 # If using Azure Lsv2 or Lsv3 VMs which have NVME disks for ephemeral storage, use the parameters below instead of the defaults
 # azure_disk_device_path = /dev
 # azure_disk_device_pattern = nvme*n1
 # Type of the data disk attached to the VMSS. 'Standard_LRS' for HDD, 'Premium_LRS' for SSD, 'StandardSSD_LRS' for Standard SSD
 data_disk_sku = Standard_LRS
 # Number of managed disks provisioned on each VM
 data_disk_count = 3
 # The size of each managed disk provisioned
 disk_size_gb = 128
 # Indicates the host caching that should be used for data disks. Valid values are ReadOnly, ReadWrite, or None
 data_disk_caching = ReadOnly
 # Location to mount managed disks in each VM
 mount_root = /var/data
 # Location where the metrics data will be written
 metrics_drive_root = var-data
 # Optional proxy VM. If not set, the first node of the cluster will be selected as the proxy.
 azure_proxy_host =
 # Azure VM SKU to use when creating the proxy host - defaults to a 8-vCore general-purpose VM
 azure_proxy_host_vm_sku = Standard_D8s_v3
 # The Azure datacenter location to use for creating Muchos resources
 location = westus2
 # Enable ADLS Gen2 storage configuration. Muchos parameters instance_volumes_input, instance_volumes_adls & adls_storage_type is not required if use_adlsg2 is false.
 use_adlsg2 = False
 # Storage accounts can be auto generated or manually specified. "|" is used as separator between manual and auto generated storage account names and must be specified
 # Manual and Auto generated names are mutually exclusive
 #
 # Specifying storage accounts manually:
 #    |abfss://<container-name>@<storage-account-name>.<domain-name>/<folder-name>". Use comma to specify multiple entries
 #    Example:|abfss://accumulodata@shnawastore1.dfs.core.windows.net/accumulo,abfss://accumulodata@shnawastore2.dfs.core.windows.net/accumulo
 #
 # Specifying auto-generated storage accounts:
 #   <Number-of-Storage-Accounts>,<domain-name>|
 #   Example: 3,dfs.core.windows.net|
 instance_volumes_input = 1,dfs.core.windows.net|
 # Do not update "instance_volumes_adls", it will be populated dynamically during launch phase of muchos
 instance_volumes_adls =
 # Type of storage for ADLS Gen2 storage accounts
 adls_storage_type = Standard_LRS
 # Specify user assigned identity name. "{{ vmss_name }}-ua-msi"  will be created if value is not provided
 user_assigned_identity =
 # Do not update "azure_tenant_id", it will be populated dynamically during launch phase of muchos
 azure_tenant_id =
 # Do not update "azure_client_id", it will be populated dynamically during launch phase of muchos
 azure_client_id =
 # Do not update "principal_id", it will be populated dynamically during launch phase of muchos when "use_hdfs = False"
 principal_id =
 # Optional Azure fileshare to mount on all nodes.
 # Path and credentials must be updated to enable this.
 #azure_fileshare_mount = /mnt/azure-fileshare
 #azure_fileshare = //fileshare-to-mount.file.core.windows.net/path
 #azure_fileshare_username = fs_username
 #azure_fileshare_password = fs_password
 # Optional integration with Azure Log Analytics
 # A workspace will be created and az_logs_resource_id, az_logs_id, az_logs_key
 # will be updated here by the launch command. If you wish to use an existing
 # workspace in another resource group, then fill in all 3 of these values before
 # running launch.
 # For details on how to get a workspace ID and key, see the link below
 # https://docs.microsoft.com/en-us/azure/azure-monitor/learn/quick-collect-linux-computer#obtain-workspace-id-and-key
 az_oms_integration_needed = False
 #az_logs_resource_id =
 #az_logs_id =
 #az_logs_key =
 # Set to true to enable Azure Application Insights integration
 # An Application Insights instance will be created by the launch command and its
 # connection string will be updated here. If you wish to use a different
 # Application Insights instance, then set az_appinsights_connection_string to
 # its connection string before invoking launch.
 az_use_app_insights = False
 az_app_insights_version = 3.2.1
 #az_appinsights_connection_string =

 [existing]
 # Root of data dirs
 mount_root = /var/data
 # Data directories on all nodes
 data_dirs = /var/data1,/var/data2,/var/data3
 # Identifies drives for metrics
 metrics_drive_ids = var-data1,var-data2,var-data3

 [performance]
 # Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
 # creating a performance profile.  Try not to use more memory than each node has
 # and leave some space for the OS.
 profile=perf-small

 # Below are different performance profiles that can be selected.  Each profile
 # has the same properties with different values.

 [perf-small]
 # Amount of JVM heap for each tserver
 accumulo_tserv_mem=2G
 # Amount of data cache for each tserver. Only applies when using Accumulo 1.x
 accumulo_dcache_size=768M
 # Amount of index cache for each tserver. Only applies when using Accumulo 1.x
 accumulo_icache_size=256M
 # In memory map size for each tserver. Only applies when using Accumulo 1.x
 accumulo_imap_size=512M
 # Amount of JVM heap for each Fluo worker
 fluo_worker_mem_mb=2048
 # Determines the gap between the Yarn memory limit and the java -Xmx setting.
 # For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
 # set to 256, then for workers the java -Xmx setting will be set to 2048-256.
 # If yarn is killing worker processes because they are using too much memory,
 # then consider increasing this setting.
 twill_reserve_mem_mb=256
 # Number of threads for each Flup worker
 fluo_worker_threads=20
 # Number of worker to run per node
 fluo_worker_instances_multiplier=1
 # Max amount of memory for YARN per node
 yarn_nm_mem_mb=4096

 [perf-medium]
 accumulo_tserv_mem=3G
 # Accumulo configs below only apply when using Accumulo 1.x
 accumulo_dcache_size=1536M
 accumulo_icache_size=512M
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 twill_reserve_mem_mb=512
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=1
 yarn_nm_mem_mb=8192

 [perf-large]
 accumulo_tserv_mem=4G
 # Accumulo configs below only apply when using Accumulo 1.x
 accumulo_dcache_size=2G
 accumulo_icache_size=1G
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 twill_reserve_mem_mb=512
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=2
 yarn_nm_mem_mb=16384

 [azd16s]
 accumulo_tserv_mem=4G
 accumulo_dcache_size=2G
 accumulo_icache_size=1G
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 twill_reserve_mem_mb=512
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=2
 yarn_nm_mem_mb=16384

 [azd8s]
 accumulo_tserv_mem=4G
 accumulo_dcache_size=2G
 accumulo_icache_size=1G
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 twill_reserve_mem_mb=512
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=2
 yarn_nm_mem_mb=16384

 [ansible-vars]
 # This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
 # Expected format:  variable = value

 [nodes]
 # If cluster_type=existing, the list of nodes below needs to manually populated with a list of cluster nodes in the following format:
 # <Hostname> = <Service1>[,<Service2>,<Service3>]
 # Where:
 #   Hostname = Must be unique.  Will be used for hostname in EC2 or should match hostname on your own cluster
 #   Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
 #             mesosmaster, worker, fluo, metrics, spark, elkserver). The following services are required: namenode, resourcemanager,
 #             accumulomaster, zookeeper & worker
 # If cluster_type=azure, the list of nodes below is auto-generated by the launch action e.g. "muchos launch --cluster accumuloclstr"
 # For the 'azure' cluster type, it is perfectly normal if the auto-generated list of node names is not sequential
 # For viewing logging and statistics with Kibana, the elkserver service must be added to a leader node.
 leader1 = namenode,resourcemanager,accumulomaster,zookeeper
 leader2 = metrics
 worker1 = worker,swarmmanager
 worker2 = worker
 worker3 = worker
 worker4 = worker
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	[general]
	# Cluster type (Azure, ec2, or existing)
	cluster_type = ec2
	# Cluster user name (install command will SSH to cluster using this user)
	# Leave default below if launching cluster in AWS
	cluster_user = fedora
	# Cluster user group
	cluster_group = %(cluster_user)s
	# Cluster user home directory
	user_home = /home/%(cluster_user)s
	# Install directory where Hadoop, Accumulo, etc will be installed
	install_dir = %(user_home)s/install
	# Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given
	# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
	# from your machine. Hostname can be chosen from "nodes" section below.
	proxy_hostname = leader1
	# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
	proxy_socks_port = 38585
	# Accumulo Instance name
	accumulo_instance = muchos
	# Accumulo Password
	accumulo_password = secret
	# Software versions (make sure you have a corresponding entry for the checksum in conf/checksums)
	hadoop_version = 3.3.6
	zookeeper_version = 3.8.3
	spark_version = 2.4.8
	fluo_version = 1.2.0
	fluo_yarn_version = 1.0.0
	accumulo_version = 2.1.2
	# Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
	download_software = True
	# Install Hub (for GitHub)
	install_hub = True
	# The java package to install - by default OpenJDK 11. If you need to use 1.8, please change this.
	java_package=java-11-openjdk-devel
	# The package to use for java 1.8
	# java_package=java-1.8.0-openjdk-devel
	# Please read the High-Availability section of the README before switching to 'True'
	hdfs_ha = False
	# Give a logical name for the cluster, all one word no special characters. Required to support HDFS HA.
	nameservice_id = muchoshacluster
	# number of accumulo tablet servers to run per host. This is only effective when systemd is set to 'True'. Needs changes in
	# accumulo-service script in order to support non-systemd cases.
	num_tservers = 1
	# If accumulo services are to be run under systemd, set this to 'True'
	use_systemd = False
	# ELK stack
	elasticsearch_version = 7.10.2
	kibana_version = 7.10.2
	filebeat_version = 7.10.2
	logstash_version = 7.10.2

	[ec2]
	# AWS machine image to use. The default below is for a Fedora image (in us-east-1).
	# You may need to change this value if a new image has been released or you are running in a different region.
	aws_ami = ami-08b4ee602f76bff79
	# Type of AWS instance launched by default
	default_instance_type = m5d.large
	# Type of AWS instance launched for any node running 'worker' service
	# Leave default below to use same instance type set by 'default_instance_type' property
	worker_instance_type = %(default_instance_type)s
	# Enable template mode by selecting a template from conf/templates, in order to leverage your own
	# custom EC2 launch requests (optional). See conf/templates/README.md for more information
	#cluster_template = example
	# VPC to launch instances in (optional)
	#vpc_id = vpc-xxxxx
	# VPC Subnet to launch instances in (optional)
	#subnet_id = subnet-xxxxxx
	# Security group ID to launch in (optional)
	#security_group_id = sg-xxxxxx
	# Name of public key that will be loaded by Amazon on to your EC2 instances.
	# You can upload and name your public key using the EC2 Management Console.
	# Only the user with this key will be able to SSH to the cluster.
	# Name below should be your 'Key pair name' in EC2 and not name of your public key file.
	key_name = my_aws_key
	# Type of filesystem to format instance storage as.
	fstype = ext4
	# Force formatting of instance devices, even when it has an existing filesystem.
	force_format = no
	# Tags to add instances
	#instance_tags = key1:value1,key2:value2
	# Nodes will be given public IP addresses if true
	associate_public_ip = true
	# Path to file containing user data that will be executed at launch
	#user_data_path = /path/to/user_data
	# Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
	shutdown_delay_minutes = 0
	# Shutdown behavior of EC2 instances: terminate or stop
	shutdown_behavior = stop

	[azure]
	# Azure subscription ID
	azure_subscription_id =
	# Name of the Azure resource group to use. All resources are created within this
	# resource group. If a resource group with this name already exists, it will be
	# used, otherwise a new resource group with this name will be created and used.
	resource_group = accumulo-rg
	# Name of the Azure Virtual Network (VNET) to use. If a VNET with this name
	# already exists, it will be used; else a new VNET with this name will be created.
	vnet = vnet1
	# The CIDR prefix used to create the virtual network (VNET).
	vnet_cidr = 10.0.0.0/8
	# A single subnet is created within the VNET and given the following name.
	subnet = subnet1
	# The CIDR prefix used for the single subnet within the virtual network.
	subnet_cidr = 10.1.0.0/16
	#Optional. If set to True, will create multiple VMSS based on multiple_vmss_vars.yml
	use_multiple_vmss = False
	# The below "vmss_priority" controls whether Azure Spot is used (or not).
	# When vmss_priority is set to Spot, the cluster uses Azure Spot VM Scale Sets.
	# When vmss_priority is set to None (which is the default), Azure Spot is NOT used.
	# Note that for multiple-VMSS deployments, this setting can be overridden at a
	# per-VMSS level by specifying vmss_priority in the conf/azure_multiple_vmss_vars.yml file.
	vmss_priority = None
	# Azure image reference defined as a pipe-delimited string in the format offer\|publisher\|sku\|version\|image_id\|
	# Please refer 'Launching an Azure cluster' section of the README before making changes
	azure_image_reference = almalinux-x86_64\|almalinux\|9-gen2\|latest\|\|
	# Image payment plan information - values required only if the image requires payment plan info.
	# The format of this configuration line is plan_name\|product\|publisher\|
	azure_image_plan = \|\|\|
	# Cloud init file to use when creating VMs with the above image reference.
	# Currently, an Alma Linux 9 specific cloud-init file is used. Different files can be used if needed.
	azure_image_cloud_init_file = cloud-init-alma9.yml
	# Azure image reference defined as a pipe-delimited string in the format offer\|publisher\|sku\|version\|image_id\|
	# This is the image that will be used for the proxy machine (if specified by azure_proxy_host). If
	# this is not set, then the value of azure_image_reference will be used on the proxy.
	# Please refer 'Launching an Azure cluster' section of the README before making changes
	#azure_proxy_image_reference = almalinux-x86_64\|almalinux\|9-gen2\|latest\|\|
	# Proxy image payment plan information - required only if the proxy image requires payment plan info.
	# If this is not set, the value of azure_image_plan will be used on the proxy.
	# The format of this configuration line is plan_name\|product\|publisher\|
	# azure_proxy_image_plan = \|\|\|
	# Optional cloud init file to use when creating VMs with the above proxy image reference. If this is not set,
	# the value of azure_image_cloud_init_file will be used.
	# azure_proxy_image_cloud_init_file =
	# Optional OS disk size in GB. If not specified, the size of the OS disk will be as defined in the VM image
	# os_disk_size_gb = 48
	# Size of the cluster to provision.
	# A virtual machine scale set (VMSS) with these many VMs will be created.
	# The minimum allowed size for this is 3 nodes for non-HA & 4 nodes for HA setup
	numnodes = 8
	# The size of each virtual machine. See the following link for other sizes:
	# https://learn.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general
	vm_sku = Standard_D8s_v3
	# Each VM will be provisioned with the following type of managed disk
	# The azure_disk_device* parameters below specify the Linux device paths Muchos looks for when selecting disks for storage
	# The default values below are for using Azure managed disks
	azure_disk_device_path = /dev/disk/azure/scsi1
	azure_disk_device_pattern = lun*
	# If using Azure Lsv2 or Lsv3 VMs which have NVME disks for ephemeral storage, use the parameters below instead of the defaults
	# azure_disk_device_path = /dev
	# azure_disk_device_pattern = nvme*n1
	# Type of the data disk attached to the VMSS. 'Standard_LRS' for HDD, 'Premium_LRS' for SSD, 'StandardSSD_LRS' for Standard SSD
	data_disk_sku = Standard_LRS
	# Number of managed disks provisioned on each VM
	data_disk_count = 3
	# The size of each managed disk provisioned
	disk_size_gb = 128
	# Indicates the host caching that should be used for data disks. Valid values are ReadOnly, ReadWrite, or None
	data_disk_caching = ReadOnly
	# Location to mount managed disks in each VM
	mount_root = /var/data
	# Location where the metrics data will be written
	metrics_drive_root = var-data
	# Optional proxy VM. If not set, the first node of the cluster will be selected as the proxy.
	azure_proxy_host =
	# Azure VM SKU to use when creating the proxy host - defaults to a 8-vCore general-purpose VM
	azure_proxy_host_vm_sku = Standard_D8s_v3
	# The Azure datacenter location to use for creating Muchos resources
	location = westus2
	# Enable ADLS Gen2 storage configuration. Muchos parameters instance_volumes_input, instance_volumes_adls & adls_storage_type is not required if use_adlsg2 is false.
	use_adlsg2 = False
	# Storage accounts can be auto generated or manually specified. "\|" is used as separator between manual and auto generated storage account names and must be specified
	# Manual and Auto generated names are mutually exclusive
	#
	# Specifying storage accounts manually:
	# \|abfss://<container-name>@<storage-account-name>.<domain-name>/<folder-name>". Use comma to specify multiple entries
	# Example:\|abfss://accumulodata@shnawastore1.dfs.core.windows.net/accumulo,abfss://accumulodata@shnawastore2.dfs.core.windows.net/accumulo
	#
	# Specifying auto-generated storage accounts:
	# <Number-of-Storage-Accounts>,<domain-name>\|
	# Example: 3,dfs.core.windows.net\|
	instance_volumes_input = 1,dfs.core.windows.net\|
	# Do not update "instance_volumes_adls", it will be populated dynamically during launch phase of muchos
	instance_volumes_adls =
	# Type of storage for ADLS Gen2 storage accounts
	adls_storage_type = Standard_LRS
	# Specify user assigned identity name. "{{ vmss_name }}-ua-msi" will be created if value is not provided
	user_assigned_identity =
	# Do not update "azure_tenant_id", it will be populated dynamically during launch phase of muchos
	azure_tenant_id =
	# Do not update "azure_client_id", it will be populated dynamically during launch phase of muchos
	azure_client_id =
	# Do not update "principal_id", it will be populated dynamically during launch phase of muchos when "use_hdfs = False"
	principal_id =
	# Optional Azure fileshare to mount on all nodes.
	# Path and credentials must be updated to enable this.
	#azure_fileshare_mount = /mnt/azure-fileshare
	#azure_fileshare = //fileshare-to-mount.file.core.windows.net/path
	#azure_fileshare_username = fs_username
	#azure_fileshare_password = fs_password
	# Optional integration with Azure Log Analytics
	# A workspace will be created and az_logs_resource_id, az_logs_id, az_logs_key
	# will be updated here by the launch command. If you wish to use an existing
	# workspace in another resource group, then fill in all 3 of these values before
	# running launch.
	# For details on how to get a workspace ID and key, see the link below
	# https://docs.microsoft.com/en-us/azure/azure-monitor/learn/quick-collect-linux-computer#obtain-workspace-id-and-key
	az_oms_integration_needed = False
	#az_logs_resource_id =
	#az_logs_id =
	#az_logs_key =
	# Set to true to enable Azure Application Insights integration
	# An Application Insights instance will be created by the launch command and its
	# connection string will be updated here. If you wish to use a different
	# Application Insights instance, then set az_appinsights_connection_string to
	# its connection string before invoking launch.
	az_use_app_insights = False
	az_app_insights_version = 3.2.1
	#az_appinsights_connection_string =

	[existing]
	# Root of data dirs
	mount_root = /var/data
	# Data directories on all nodes
	data_dirs = /var/data1,/var/data2,/var/data3
	# Identifies drives for metrics
	metrics_drive_ids = var-data1,var-data2,var-data3

	[performance]
	# Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
	# creating a performance profile. Try not to use more memory than each node has
	# and leave some space for the OS.
	profile=perf-small

	# Below are different performance profiles that can be selected. Each profile
	# has the same properties with different values.

	[perf-small]
	# Amount of JVM heap for each tserver
	accumulo_tserv_mem=2G
	# Amount of data cache for each tserver. Only applies when using Accumulo 1.x
	accumulo_dcache_size=768M
	# Amount of index cache for each tserver. Only applies when using Accumulo 1.x
	accumulo_icache_size=256M
	# In memory map size for each tserver. Only applies when using Accumulo 1.x
	accumulo_imap_size=512M
	# Amount of JVM heap for each Fluo worker
	fluo_worker_mem_mb=2048
	# Determines the gap between the Yarn memory limit and the java -Xmx setting.
	# For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
	# set to 256, then for workers the java -Xmx setting will be set to 2048-256.
	# If yarn is killing worker processes because they are using too much memory,
	# then consider increasing this setting.
	twill_reserve_mem_mb=256
	# Number of threads for each Flup worker
	fluo_worker_threads=20
	# Number of worker to run per node
	fluo_worker_instances_multiplier=1
	# Max amount of memory for YARN per node
	yarn_nm_mem_mb=4096

	[perf-medium]
	accumulo_tserv_mem=3G
	# Accumulo configs below only apply when using Accumulo 1.x
	accumulo_dcache_size=1536M
	accumulo_icache_size=512M
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=1
	yarn_nm_mem_mb=8192

	[perf-large]
	accumulo_tserv_mem=4G
	# Accumulo configs below only apply when using Accumulo 1.x
	accumulo_dcache_size=2G
	accumulo_icache_size=1G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=2
	yarn_nm_mem_mb=16384

	[azd16s]
	accumulo_tserv_mem=4G
	accumulo_dcache_size=2G
	accumulo_icache_size=1G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=2
	yarn_nm_mem_mb=16384

	[azd8s]
	accumulo_tserv_mem=4G
	accumulo_dcache_size=2G
	accumulo_icache_size=1G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=2
	yarn_nm_mem_mb=16384

	[ansible-vars]
	# This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
	# Expected format: variable = value

	[nodes]
	# If cluster_type=existing, the list of nodes below needs to manually populated with a list of cluster nodes in the following format:
	# <Hostname> = <Service1>[,<Service2>,<Service3>]
	# Where:
	# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
	# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
	# mesosmaster, worker, fluo, metrics, spark, elkserver). The following services are required: namenode, resourcemanager,
	# accumulomaster, zookeeper & worker
	# If cluster_type=azure, the list of nodes below is auto-generated by the launch action e.g. "muchos launch --cluster accumuloclstr"
	# For the 'azure' cluster type, it is perfectly normal if the auto-generated list of node names is not sequential
	# For viewing logging and statistics with Kibana, the elkserver service must be added to a leader node.
	leader1 = namenode,resourcemanager,accumulomaster,zookeeper
	leader2 = metrics
	worker1 = worker,swarmmanager
	worker2 = worker
	worker3 = worker
	worker4 = worker