blob: c33a2d4ad67190c585f479dd231becfe599aee27 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[general]
# Cluster type (ec2 or existing)
cluster_type = ec2
# Cluster user name (install command will SSH to cluster using this user)
# Leave default below if launching cluster in AWS
cluster_user = centos
# Cluster user group
cluster_group = %(cluster_user)s
# Cluster user home directory
user_home = /home/%(cluster_user)s
# Install directory where Hadoop, Accumulo, etc will be installed
install_dir = %(user_home)s/install
# Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
proxy_hostname = leader1
# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
#proxy_socks_port = 38585
# Accumulo Instance name
accumulo_instance = muchos
# Accumluo Password
accumulo_password = secret
# Software versions (set sha-256 in conf/checksums)
hadoop_version = 3.1.1
zookeeper_version = 3.4.14
spark_version = 2.3.2
fluo_version = 1.2.0
fluo_yarn_version = 1.0.0
accumulo_version = 2.0.0-alpha-1
# Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
download_software = True
# Install Hub (for GitHub)
install_hub = True
[ec2]
# AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
# You may need to change this value if a new image has been released or you are running in a different region.
# Before using this AMI, subscribe to it on the CentOS product page below or launching will fail:
# https://aws.amazon.com/marketplace/pp/B00O7WM7QW
aws_ami = ami-9887c6e7
# Type of AWS instance launched by default
default_instance_type = m5d.large
# Type of AWS instance launched for any node running 'worker' service
# Leave default below to use same instance type set by 'default_instance_type' property
worker_instance_type = %(default_instance_type)s
# VPC to launch instances in (optional)
#vpc_id = vpc-xxxxx
# VPC Subnet to launch instances in (optional)
#subnet_id = subnet-xxxxxx
# Security group ID to launch in (optional)
#security_group_id = sg-xxxxxx
# Name of public key that will be loaded by Amazon on to your EC2 instances.
# You can upload and name your public key using the EC2 Management Console.
# Only the user with this key will be able to SSH to the cluster.
# Name below should be your 'Key pair name' in EC2 and not name of your public key file.
key_name = my_aws_key
# Type of filesystem to format instance storage as.
fstype = ext3
# Force formatting of instance devices, even when it has an existing filesystem.
force_format = no
# Tags to add instances
#instance_tags = key1:value1,key2:value2
# Nodes will be given public IP addresses if true
associate_public_ip = true
# Path to file containing user data that will be executed at launch
#user_data_path = /path/to/user_data
# Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
shutdown_delay_minutes = 0
# Shutdown behavior of EC2 instances: terminate or stop
shutdown_behavior = stop
[existing]
# Root of data dirs
mount_root = /var/data
# Data directories on all nodes
data_dirs = /var/data1,/var/data2,/var/data3
# Identifies drives for metrics
metrics_drive_ids = var-data1,var-data2,var-data3
[performance]
# Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
# creating a performance profile. Try not to use more memory than each node has
# and leave some space for the OS.
profile=perf-small
# Below are different performance profiles that can be selected. Each profile
# has the same properties with different values.
[perf-small]
# Amount of JVM heap for each tserver
accumulo_tserv_mem=2G
# Amount of data cache for each tserver. Only applies when using Accumulo 1.x
accumulo_dcache_size=768M
# Amount of index cache for each tserver. Only applies when using Accumulo 1.x
accumulo_icache_size=256M
# In memory map size for each tserver. Only applies when using Accumulo 1.x
accumulo_imap_size=512M
# Amount of JVM heap for each Fluo worker
fluo_worker_mem_mb=2048
# Determines the gap between the Yarn memory limit and the java -Xmx setting.
# For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
# set to 256, then for workers the java -Xmx setting will be set to 2048-256.
# If yarn is killing worker processes because they are using too much memory,
# then consider increasing this setting.
twill_reserve_mem_mb=256
# Number of threads for each Flup worker
fluo_worker_threads=20
# Number of worker to run per node
fluo_worker_instances_multiplier=1
# Max amount of memory for YARN per node
yarn_nm_mem_mb=4096
[perf-medium]
accumulo_tserv_mem=3G
# Accumulo configs below only apply when using Accumulo 1.x
accumulo_dcache_size=1536M
accumulo_icache_size=512M
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
twill_reserve_mem_mb=512
fluo_worker_threads=64
fluo_worker_instances_multiplier=1
yarn_nm_mem_mb=8192
[perf-large]
accumulo_tserv_mem=4G
# Accumulo configs below only apply when using Accumulo 1.x
accumulo_dcache_size=2G
accumulo_icache_size=1G
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
twill_reserve_mem_mb=512
fluo_worker_threads=64
fluo_worker_instances_multiplier=2
yarn_nm_mem_mb=16384
[ansible-vars]
# This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
# Expected format: variable = value
[nodes]
# Describes nodes in cluster in the following format:
# <Hostname> = <Service1>[,<Service2>,<Service3>]
# Where:
# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
# mesosmaster, worker, fluo, metrics, spark). The following services are required: namenode, resourcemanager,
# accumulomaster, zookeeper & worker
leader1 = namenode,resourcemanager,accumulomaster,zookeeper
leader2 = metrics
worker1 = worker,swarmmanager
worker2 = worker
worker3 = worker
worker4 = worker