blob: 56a427fade0da8d2d435da36ade1bb8cf975a1d5 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# Cluster type (Azure, ec2, or existing)
cluster_type = ec2
# Cluster user name (install command will SSH to cluster using this user)
# Leave default below if launching cluster in AWS
cluster_user = centos
# Cluster user group
cluster_group = %(cluster_user)s
# Cluster user home directory
user_home = /home/%(cluster_user)s
# Install directory where Hadoop, Accumulo, etc will be installed
install_dir = %(user_home)s/install
# Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
proxy_hostname = leader1
# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
#proxy_socks_port = 38585
# Accumulo Instance name
accumulo_instance = muchos
# Accumulo Password
accumulo_password = secret
# Software versions (make sure you have a corresponding entry for the checksum in conf/checksums)
hadoop_version = 3.2.2
zookeeper_version = 3.6.3
spark_version = 2.4.8
fluo_version = 1.2.0
fluo_yarn_version = 1.0.0
accumulo_version = 2.0.1
# Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
download_software = True
# Install Hub (for GitHub)
install_hub = True
# The java package to install
# The package to use for java 11
# java_package=java-11-openjdk-devel
# Please read the High-Availability section of the README before switching to 'True'
hdfs_ha = False
# Give a logical name for the cluster, all one word no special characters. Required to support HDFS HA.
nameservice_id = muchoshacluster
# number of accumulo tablet servers to run per host. This is only effective when systemd is set to 'True'. Needs changes in
# accumulo-service script in order to support non-systemd cases.
num_tservers = 1
# If accumulo services are to be run under systemd, set this to 'True'
use_systemd = False
# AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
# You may need to change this value if a new image has been released or you are running in a different region.
# Before using this AMI, subscribe to it on the CentOS product page below or launching will fail:
aws_ami = ami-0affd4508a5d2481b
# Type of AWS instance launched by default
default_instance_type = m5d.large
# Type of AWS instance launched for any node running 'worker' service
# Leave default below to use same instance type set by 'default_instance_type' property
worker_instance_type = %(default_instance_type)s
# Enable template mode by selecting a template from conf/templates, in order to leverage your own
# custom EC2 launch requests (optional). See conf/templates/ for more information
#cluster_template = example
# VPC to launch instances in (optional)
#vpc_id = vpc-xxxxx
# VPC Subnet to launch instances in (optional)
#subnet_id = subnet-xxxxxx
# Security group ID to launch in (optional)
#security_group_id = sg-xxxxxx
# Name of public key that will be loaded by Amazon on to your EC2 instances.
# You can upload and name your public key using the EC2 Management Console.
# Only the user with this key will be able to SSH to the cluster.
# Name below should be your 'Key pair name' in EC2 and not name of your public key file.
key_name = my_aws_key
# Type of filesystem to format instance storage as.
fstype = ext3
# Force formatting of instance devices, even when it has an existing filesystem.
force_format = no
# Tags to add instances
#instance_tags = key1:value1,key2:value2
# Nodes will be given public IP addresses if true
associate_public_ip = true
# Path to file containing user data that will be executed at launch
#user_data_path = /path/to/user_data
# Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
shutdown_delay_minutes = 0
# Shutdown behavior of EC2 instances: terminate or stop
shutdown_behavior = stop
# Name of the Azure resource group to use. All resources are created within this
# resource group. If a resource group with this name already exists, it will be
# used, otherwise a new resource group with this name will be created and used.
resource_group = accumulo-rg
# Name of the Azure Virtual Network (VNET) to use. If a VNET with this name
# already exists, it will be used; else a new VNET with this name will be created.
vnet = vnet1
# The CIDR prefix used to create the virtual network (VNET).
vnet_cidr =
# A single subnet is created within the VNET and given the following name.
subnet = subnet1
# The CIDR prefix used for the single subnet within the virtual network.
subnet_cidr =
#Optional. If set to True, will create multiple VMSS based on multiple_vmss_vars.yml
use_multiple_vmss = False
# The below "vmss_priority" controls whether Azure Spot is used (or not).
# When vmss_priority is set to Low, the cluster uses Azure Spot VM Scale Sets.
# When vmss_priority is set to Regular (which is the default), Azure Spot is NOT used.
# Note that for multiple-VMSS deployments, this setting can be overridden at a
# per-VMSS level by specifying vmss_priority in the conf/azure_multiple_vmss_vars.yml file.
vmss_priority = Regular
# Azure image reference defined as a pipe-delimited string in the format offer|publisher|sku|version|
# Please refer 'Launching an Azure cluster' section of the README before making changes
azure_image_reference = CentOS|OpenLogic|7.5|latest|
# Size of the cluster to provision.
# A virtual machine scale set (VMSS) with these many VMs will be created.
# The minimum allowed size for this is 3 nodes for non-HA & 4 nodes for HA setup
numnodes = 8
# The size of each virtual machine. See the following link for other sizes:
vm_sku = Standard_D8s_v3
# Each VM will be provisioned with the following type of managed disk
# The azure_disk_device* parameters below specify the Linux device paths Muchos looks for when selecting disks for storage
# The default values below are for using Azure managed disks
azure_disk_device_path = /dev/disk/azure/scsi1
azure_disk_device_pattern = lun*
# If using Azure Lsv2 VMs which have NVME disks for ephemeral storage, use the parameters below instead of the defaults
# azure_disk_device_path = /dev
# azure_disk_device_pattern = nvme*n1
# Type of the data disk attached to the VMSS. 'Standard_LRS' for HDD, 'Premium_LRS' for SSD, 'StandardSSD_LRS' for Standard SSD
data_disk_sku = Standard_LRS
# Number of managed disks provisioned on each VM
data_disk_count = 3
# The size of each managed disk provisioned
disk_size_gb = 128
# Indicates the host caching that should be used for data disks. Valid values are ReadOnly, ReadWrite, or None
data_disk_caching = ReadOnly
# Location to mount managed disks in each VM
mount_root = /var/data
# Location where the metrics data will be written
metrics_drive_root = var-data
# Optional proxy VM. If not set, the first node of the cluster will be selected as the proxy.
azure_proxy_host =
# Azure VM SKU to use when creating the proxy host - defaults to a 8-vCore general-purpose VM
azure_proxy_host_vm_sku = Standard_D8s_v3
# The Azure datacenter location to use for creating Muchos resources
location = westus2
# Enable ADLS Gen2 storage configuration. Muchos parameters instance_volumes_input, instance_volumes_adls & adls_storage_type is not required if use_adlsg2 is false.
use_adlsg2 = False
# Storage accounts can be auto generated or manually specified. "|" is used as separator between manual and auto generated storage account names and must be specified
# Manual and Auto generated names are mutually exclusive
# Specifying storage accounts manually:
# |abfss://<container-name>@<storage-account-name>.<domain-name>/<folder-name>". Use comma to specify multiple entries
# Example:|abfss://,abfss://
# Specifying auto-generated storage accounts:
# <Number-of-Storage-Accounts>,<domain-name>|
# Example: 3,|
instance_volumes_input = 1,|
# Do not update "instance_volumes_adls", it will be populated dynamically during launch phase of muchos
instance_volumes_adls =
# Type of storage for ADLS Gen2 storage accounts
adls_storage_type = Standard_LRS
# Specify user assigned identity name. "{{ vmss_name }}-ua-msi" will be created if value is not provided
user_assigned_identity =
# Do not update "azure_tenant_id", it will be populated dynamically during launch phase of muchos
azure_tenant_id =
# Do not update "azure_client_id", it will be populated dynamically during launch phase of muchos
azure_client_id =
# Do not update "principal_id", it will be populated dynamically during launch phase of muchos when "use_hdfs = False"
principal_id =
# Optional Azure fileshare to mount on all nodes.
# Path and credentials must be updated to enable this.
#azure_fileshare_mount = /mnt/azure-fileshare
#azure_fileshare = //
#azure_fileshare_username = fs_username
#azure_fileshare_password = fs_password
# Optional integration with Azure Log Analytics
# Workspace ID and key must be updated to enable this.
# For details on how to get a workspace ID and key, see the link below
az_oms_integration_needed = False
#az_logs_id =
#az_logs_key =
# Root of data dirs
mount_root = /var/data
# Data directories on all nodes
data_dirs = /var/data1,/var/data2,/var/data3
# Identifies drives for metrics
metrics_drive_ids = var-data1,var-data2,var-data3
# Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
# creating a performance profile. Try not to use more memory than each node has
# and leave some space for the OS.
# Below are different performance profiles that can be selected. Each profile
# has the same properties with different values.
# Amount of JVM heap for each tserver
# Amount of data cache for each tserver. Only applies when using Accumulo 1.x
# Amount of index cache for each tserver. Only applies when using Accumulo 1.x
# In memory map size for each tserver. Only applies when using Accumulo 1.x
# Amount of JVM heap for each Fluo worker
# Determines the gap between the Yarn memory limit and the java -Xmx setting.
# For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
# set to 256, then for workers the java -Xmx setting will be set to 2048-256.
# If yarn is killing worker processes because they are using too much memory,
# then consider increasing this setting.
# Number of threads for each Flup worker
# Number of worker to run per node
# Max amount of memory for YARN per node
# Accumulo configs below only apply when using Accumulo 1.x
# Accumulo configs below only apply when using Accumulo 1.x
# This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
# Expected format: variable = value
# If cluster_type=existing, the list of nodes below needs to manually populated with a list of cluster nodes in the following format:
# <Hostname> = <Service1>[,<Service2>,<Service3>]
# Where:
# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
# mesosmaster, worker, fluo, metrics, spark, elkserver). The following services are required: namenode, resourcemanager,
# accumulomaster, zookeeper & worker
# If cluster_type=azure, the list of nodes below is auto-generated by the launch action e.g. "muchos launch --cluster accumuloclstr"
# For the 'azure' cluster type, it is perfectly normal if the auto-generated list of node names is not sequential
# For viewing logging and statistics with Kibana, the elkserver service must be added to a leader node.
leader1 = namenode,resourcemanager,accumulomaster,zookeeper
leader2 = metrics
worker1 = worker,swarmmanager
worker2 = worker
worker3 = worker
worker4 = worker