| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| [general] |
| # Cluster type (Azure, ec2, or existing) |
| cluster_type = ec2 |
| # Cluster user name (install command will SSH to cluster using this user) |
| # Leave default below if launching cluster in AWS |
| cluster_user = centos |
| # Cluster user group |
| cluster_group = %(cluster_user)s |
| # Cluster user home directory |
| user_home = /home/%(cluster_user)s |
| # Install directory where Hadoop, Accumulo, etc will be installed |
| install_dir = %(user_home)s/install |
| # Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given |
| # public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached |
| # from your machine. Hostname can be chosen from "nodes" section below. |
| proxy_hostname = leader1 |
| # If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>' |
| #proxy_socks_port = 38585 |
| # Accumulo Instance name |
| accumulo_instance = muchos |
| # Accumulo Password |
| accumulo_password = secret |
| # Software versions (make sure you have a corresponding entry for the checksum in conf/checksums) |
| hadoop_version = 3.2.2 |
| zookeeper_version = 3.6.3 |
| spark_version = 2.4.8 |
| fluo_version = 1.2.0 |
| fluo_yarn_version = 1.0.0 |
| accumulo_version = 2.0.1 |
| # Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/ |
| download_software = True |
| # Install Hub (for GitHub) |
| install_hub = True |
| # The java package to install |
| java_package=java-1.8.0-openjdk-devel |
| # The package to use for java 11 |
| # java_package=java-11-openjdk-devel |
| # Please read the High-Availability section of the README before switching to 'True' |
| hdfs_ha = False |
| # Give a logical name for the cluster, all one word no special characters. Required to support HDFS HA. |
| nameservice_id = muchoshacluster |
| # number of accumulo tablet servers to run per host. This is only effective when systemd is set to 'True'. Needs changes in |
| # accumulo-service script in order to support non-systemd cases. |
| num_tservers = 1 |
| # If accumulo services are to be run under systemd, set this to 'True' |
| use_systemd = False |
| |
| [ec2] |
| # AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1). |
| # You may need to change this value if a new image has been released or you are running in a different region. |
| # Before using this AMI, subscribe to it on the CentOS product page below or launching will fail: |
| # https://aws.amazon.com/marketplace/pp/B00O7WM7QW |
| aws_ami = ami-0affd4508a5d2481b |
| # Type of AWS instance launched by default |
| default_instance_type = m5d.large |
| # Type of AWS instance launched for any node running 'worker' service |
| # Leave default below to use same instance type set by 'default_instance_type' property |
| worker_instance_type = %(default_instance_type)s |
| # Enable template mode by selecting a template from conf/templates, in order to leverage your own |
| # custom EC2 launch requests (optional). See conf/templates/README.md for more information |
| #cluster_template = example |
| # VPC to launch instances in (optional) |
| #vpc_id = vpc-xxxxx |
| # VPC Subnet to launch instances in (optional) |
| #subnet_id = subnet-xxxxxx |
| # Security group ID to launch in (optional) |
| #security_group_id = sg-xxxxxx |
| # Name of public key that will be loaded by Amazon on to your EC2 instances. |
| # You can upload and name your public key using the EC2 Management Console. |
| # Only the user with this key will be able to SSH to the cluster. |
| # Name below should be your 'Key pair name' in EC2 and not name of your public key file. |
| key_name = my_aws_key |
| # Type of filesystem to format instance storage as. |
| fstype = ext3 |
| # Force formatting of instance devices, even when it has an existing filesystem. |
| force_format = no |
| # Tags to add instances |
| #instance_tags = key1:value1,key2:value2 |
| # Nodes will be given public IP addresses if true |
| associate_public_ip = true |
| # Path to file containing user data that will be executed at launch |
| #user_data_path = /path/to/user_data |
| # Shutdown instances after a delay (in minutes). If 0, no shutdown will occur. |
| shutdown_delay_minutes = 0 |
| # Shutdown behavior of EC2 instances: terminate or stop |
| shutdown_behavior = stop |
| |
| [azure] |
| # Name of the Azure resource group to use. All resources are created within this |
| # resource group. If a resource group with this name already exists, it will be |
| # used, otherwise a new resource group with this name will be created and used. |
| resource_group = accumulo-rg |
| # Name of the Azure Virtual Network (VNET) to use. If a VNET with this name |
| # already exists, it will be used; else a new VNET with this name will be created. |
| vnet = vnet1 |
| # The CIDR prefix used to create the virtual network (VNET). |
| vnet_cidr = 10.0.0.0/8 |
| # A single subnet is created within the VNET and given the following name. |
| subnet = subnet1 |
| # The CIDR prefix used for the single subnet within the virtual network. |
| subnet_cidr = 10.1.0.0/16 |
| #Optional. If set to True, will create multiple VMSS based on multiple_vmss_vars.yml |
| use_multiple_vmss = False |
| # The below "vmss_priority" controls whether Azure Spot is used (or not). |
| # When vmss_priority is set to Low, the cluster uses Azure Spot VM Scale Sets. |
| # When vmss_priority is set to Regular (which is the default), Azure Spot is NOT used. |
| # Note that for multiple-VMSS deployments, this setting can be overridden at a |
| # per-VMSS level by specifying vmss_priority in the conf/azure_multiple_vmss_vars.yml file. |
| vmss_priority = Regular |
| # Azure image reference defined as a pipe-delimited string in the format offer|publisher|sku|version| |
| # Please refer 'Launching an Azure cluster' section of the README before making changes |
| azure_image_reference = CentOS|OpenLogic|7.5|latest| |
| # Size of the cluster to provision. |
| # A virtual machine scale set (VMSS) with these many VMs will be created. |
| # The minimum allowed size for this is 3 nodes for non-HA & 4 nodes for HA setup |
| numnodes = 8 |
| # The size of each virtual machine. See the following link for other sizes: |
| # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general |
| vm_sku = Standard_D8s_v3 |
| # Each VM will be provisioned with the following type of managed disk |
| # The azure_disk_device* parameters below specify the Linux device paths Muchos looks for when selecting disks for storage |
| # The default values below are for using Azure managed disks |
| azure_disk_device_path = /dev/disk/azure/scsi1 |
| azure_disk_device_pattern = lun* |
| # If using Azure Lsv2 VMs which have NVME disks for ephemeral storage, use the parameters below instead of the defaults |
| # azure_disk_device_path = /dev |
| # azure_disk_device_pattern = nvme*n1 |
| # Type of the data disk attached to the VMSS. 'Standard_LRS' for HDD, 'Premium_LRS' for SSD, 'StandardSSD_LRS' for Standard SSD |
| data_disk_sku = Standard_LRS |
| # Number of managed disks provisioned on each VM |
| data_disk_count = 3 |
| # The size of each managed disk provisioned |
| disk_size_gb = 128 |
| # Indicates the host caching that should be used for data disks. Valid values are ReadOnly, ReadWrite, or None |
| data_disk_caching = ReadOnly |
| # Location to mount managed disks in each VM |
| mount_root = /var/data |
| # Location where the metrics data will be written |
| metrics_drive_root = var-data |
| # Optional proxy VM. If not set, the first node of the cluster will be selected as the proxy. |
| azure_proxy_host = |
| # Azure VM SKU to use when creating the proxy host - defaults to a 8-vCore general-purpose VM |
| azure_proxy_host_vm_sku = Standard_D8s_v3 |
| # The Azure datacenter location to use for creating Muchos resources |
| location = westus2 |
| # Enable ADLS Gen2 storage configuration. Muchos parameters instance_volumes_input, instance_volumes_adls & adls_storage_type is not required if use_adlsg2 is false. |
| use_adlsg2 = False |
| # Storage accounts can be auto generated or manually specified. "|" is used as separator between manual and auto generated storage account names and must be specified |
| # Manual and Auto generated names are mutually exclusive |
| # |
| # Specifying storage accounts manually: |
| # |abfss://<container-name>@<storage-account-name>.<domain-name>/<folder-name>". Use comma to specify multiple entries |
| # Example:|abfss://accumulodata@shnawastore1.dfs.core.windows.net/accumulo,abfss://accumulodata@shnawastore2.dfs.core.windows.net/accumulo |
| # |
| # Specifying auto-generated storage accounts: |
| # <Number-of-Storage-Accounts>,<domain-name>| |
| # Example: 3,dfs.core.windows.net| |
| instance_volumes_input = 1,dfs.core.windows.net| |
| # Do not update "instance_volumes_adls", it will be populated dynamically during launch phase of muchos |
| instance_volumes_adls = |
| # Type of storage for ADLS Gen2 storage accounts |
| adls_storage_type = Standard_LRS |
| # Specify user assigned identity name. "{{ vmss_name }}-ua-msi" will be created if value is not provided |
| user_assigned_identity = |
| # Do not update "azure_tenant_id", it will be populated dynamically during launch phase of muchos |
| azure_tenant_id = |
| # Do not update "azure_client_id", it will be populated dynamically during launch phase of muchos |
| azure_client_id = |
| # Do not update "principal_id", it will be populated dynamically during launch phase of muchos when "use_hdfs = False" |
| principal_id = |
| # Optional Azure fileshare to mount on all nodes. |
| # Path and credentials must be updated to enable this. |
| #azure_fileshare_mount = /mnt/azure-fileshare |
| #azure_fileshare = //fileshare-to-mount.file.core.windows.net/path |
| #azure_fileshare_username = fs_username |
| #azure_fileshare_password = fs_password |
| # Optional integration with Azure Log Analytics |
| # Workspace ID and key must be updated to enable this. |
| # For details on how to get a workspace ID and key, see the link below |
| # https://docs.microsoft.com/en-us/azure/azure-monitor/learn/quick-collect-linux-computer#obtain-workspace-id-and-key |
| az_oms_integration_needed = False |
| #az_logs_id = |
| #az_logs_key = |
| |
| [existing] |
| # Root of data dirs |
| mount_root = /var/data |
| # Data directories on all nodes |
| data_dirs = /var/data1,/var/data2,/var/data3 |
| # Identifies drives for metrics |
| metrics_drive_ids = var-data1,var-data2,var-data3 |
| |
| [performance] |
| # Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or |
| # creating a performance profile. Try not to use more memory than each node has |
| # and leave some space for the OS. |
| profile=perf-small |
| |
| # Below are different performance profiles that can be selected. Each profile |
| # has the same properties with different values. |
| |
| [perf-small] |
| # Amount of JVM heap for each tserver |
| accumulo_tserv_mem=2G |
| # Amount of data cache for each tserver. Only applies when using Accumulo 1.x |
| accumulo_dcache_size=768M |
| # Amount of index cache for each tserver. Only applies when using Accumulo 1.x |
| accumulo_icache_size=256M |
| # In memory map size for each tserver. Only applies when using Accumulo 1.x |
| accumulo_imap_size=512M |
| # Amount of JVM heap for each Fluo worker |
| fluo_worker_mem_mb=2048 |
| # Determines the gap between the Yarn memory limit and the java -Xmx setting. |
| # For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is |
| # set to 256, then for workers the java -Xmx setting will be set to 2048-256. |
| # If yarn is killing worker processes because they are using too much memory, |
| # then consider increasing this setting. |
| twill_reserve_mem_mb=256 |
| # Number of threads for each Flup worker |
| fluo_worker_threads=20 |
| # Number of worker to run per node |
| fluo_worker_instances_multiplier=1 |
| # Max amount of memory for YARN per node |
| yarn_nm_mem_mb=4096 |
| |
| [perf-medium] |
| accumulo_tserv_mem=3G |
| # Accumulo configs below only apply when using Accumulo 1.x |
| accumulo_dcache_size=1536M |
| accumulo_icache_size=512M |
| accumulo_imap_size=512M |
| fluo_worker_mem_mb=4096 |
| twill_reserve_mem_mb=512 |
| fluo_worker_threads=64 |
| fluo_worker_instances_multiplier=1 |
| yarn_nm_mem_mb=8192 |
| |
| [perf-large] |
| accumulo_tserv_mem=4G |
| # Accumulo configs below only apply when using Accumulo 1.x |
| accumulo_dcache_size=2G |
| accumulo_icache_size=1G |
| accumulo_imap_size=512M |
| fluo_worker_mem_mb=4096 |
| twill_reserve_mem_mb=512 |
| fluo_worker_threads=64 |
| fluo_worker_instances_multiplier=2 |
| yarn_nm_mem_mb=16384 |
| |
| [azd16s] |
| accumulo_tserv_mem=4G |
| accumulo_dcache_size=2G |
| accumulo_icache_size=1G |
| accumulo_imap_size=512M |
| fluo_worker_mem_mb=4096 |
| twill_reserve_mem_mb=512 |
| fluo_worker_threads=64 |
| fluo_worker_instances_multiplier=2 |
| yarn_nm_mem_mb=16384 |
| |
| [azd8s] |
| accumulo_tserv_mem=4G |
| accumulo_dcache_size=2G |
| accumulo_icache_size=1G |
| accumulo_imap_size=512M |
| fluo_worker_mem_mb=4096 |
| twill_reserve_mem_mb=512 |
| fluo_worker_threads=64 |
| fluo_worker_instances_multiplier=2 |
| yarn_nm_mem_mb=16384 |
| |
| [ansible-vars] |
| # This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos. |
| # Expected format: variable = value |
| |
| [nodes] |
| # If cluster_type=existing, the list of nodes below needs to manually populated with a list of cluster nodes in the following format: |
| # <Hostname> = <Service1>[,<Service2>,<Service3>] |
| # Where: |
| # Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster |
| # Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager, |
| # mesosmaster, worker, fluo, metrics, spark, elkserver). The following services are required: namenode, resourcemanager, |
| # accumulomaster, zookeeper & worker |
| # If cluster_type=azure, the list of nodes below is auto-generated by the launch action e.g. "muchos launch --cluster accumuloclstr" |
| # For the 'azure' cluster type, it is perfectly normal if the auto-generated list of node names is not sequential |
| # For viewing logging and statistics with Kibana, the elkserver service must be added to a leader node. |
| leader1 = namenode,resourcemanager,accumulomaster,zookeeper |
| leader2 = metrics |
| worker1 = worker,swarmmanager |
| worker2 = worker |
| worker3 = worker |
| worker4 = worker |