conf/muchos.props.example - fluo-muchos - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 [general]
 # Cluster type (ec2 or existing)
 cluster_type = ec2
 # Cluster user name (install command will SSH to cluster using this user)
 # Leave default below if launching cluster in AWS
 cluster_user = centos
 # Cluster user group
 cluster_group = %(cluster_user)s
 # Cluster user home directory
 user_home = /home/%(cluster_user)s
 # Install directory where Hadoop, Accumulo, etc will be installed
 install_dir = %(user_home)s/install
 # Hostname of proxy node that Muchos will use to direct installation of cluster.  Will be given
 # public IP if launching in EC2.  If not launching in EC2, node must have public IP that can be reached
 # from your machine. Hostname can be chosen from "nodes" section below.
 proxy_hostname = leader1
 # If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
 #proxy_socks_port = 38585
 # Accumulo Instance name
 accumulo_instance = muchos
 # Accumluo Password
 accumulo_password = secret
 # Software versions (set sha-256 in conf/checksums)
 hadoop_version = 3.1.1
 zookeeper_version = 3.4.14
 spark_version = 2.3.2
 fluo_version = 1.2.0
 fluo_yarn_version = 1.0.0
 accumulo_version = 2.0.0-alpha-1
 # Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
 download_software = True
 # Install Hub (for GitHub)
 install_hub = True

 [ec2]
 # AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
 # You may need to change this value if a new image has been released or you are running in a different region.
 # Before using this AMI, subscribe to it on the CentOS product page below or launching will fail:
 #   https://aws.amazon.com/marketplace/pp/B00O7WM7QW
 aws_ami = ami-9887c6e7
 # Type of AWS instance launched by default
 default_instance_type = m5d.large
 # Type of AWS instance launched for any node running 'worker' service
 # Leave default below to use same instance type set by 'default_instance_type' property
 worker_instance_type = %(default_instance_type)s
 # VPC to launch instances in (optional)
 #vpc_id = vpc-xxxxx
 # VPC Subnet to launch instances in (optional)
 #subnet_id = subnet-xxxxxx
 # Security group ID to launch in (optional)
 #security_group_id = sg-xxxxxx
 # Name of public key that will be loaded by Amazon on to your EC2 instances.
 # You can upload and name your public key using the EC2 Management Console.
 # Only the user with this key will be able to SSH to the cluster.
 # Name below should be your 'Key pair name' in EC2 and not name of your public key file.
 key_name = my_aws_key
 # Type of filesystem to format instance storage as.
 fstype = ext3
 # Force formatting of instance devices, even when it has an existing filesystem.
 force_format = no
 # Tags to add instances
 #instance_tags = key1:value1,key2:value2
 # Nodes will be given public IP addresses if true
 associate_public_ip = true
 # Path to file containing user data that will be executed at launch
 #user_data_path = /path/to/user_data
 # Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
 shutdown_delay_minutes = 0
 # Shutdown behavior of EC2 instances: terminate or stop
 shutdown_behavior = stop

 [existing]
 # Root of data dirs
 mount_root = /var/data
 # Data directories on all nodes
 data_dirs = /var/data1,/var/data2,/var/data3
 # Identifies drives for metrics
 metrics_drive_ids = var-data1,var-data2,var-data3

 [performance]
 # Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
 # creating a performance profile.  Try not to use more memory than each node has
 # and leave some space for the OS.
 profile=perf-small

 # Below are different performance profiles that can be selected.  Each profile
 # has the same properties with different values.

 [perf-small]
 # Amount of JVM heap for each tserver
 accumulo_tserv_mem=2G
 # Amount of data cache for each tserver. Only applies when using Accumulo 1.x
 accumulo_dcache_size=768M
 # Amount of index cache for each tserver. Only applies when using Accumulo 1.x
 accumulo_icache_size=256M
 # In memory map size for each tserver. Only applies when using Accumulo 1.x
 accumulo_imap_size=512M
 # Amount of JVM heap for each Fluo worker
 fluo_worker_mem_mb=2048
 # Determines the gap between the Yarn memory limit and the java -Xmx setting.
 # For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
 # set to 256, then for workers the java -Xmx setting will be set to 2048-256.
 # If yarn is killing worker processes because they are using too much memory,
 # then consider increasing this setting.
 twill_reserve_mem_mb=256
 # Number of threads for each Flup worker
 fluo_worker_threads=20
 # Number of worker to run per node
 fluo_worker_instances_multiplier=1
 # Max amount of memory for YARN per node
 yarn_nm_mem_mb=4096

 [perf-medium]
 accumulo_tserv_mem=3G
 # Accumulo configs below only apply when using Accumulo 1.x
 accumulo_dcache_size=1536M
 accumulo_icache_size=512M
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 twill_reserve_mem_mb=512
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=1
 yarn_nm_mem_mb=8192

 [perf-large]
 accumulo_tserv_mem=4G
 # Accumulo configs below only apply when using Accumulo 1.x
 accumulo_dcache_size=2G
 accumulo_icache_size=1G
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 twill_reserve_mem_mb=512
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=2
 yarn_nm_mem_mb=16384

 [ansible-vars]
 # This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
 # Expected format:  variable = value

 [nodes]
 # Describes nodes in cluster in the following format:
 # <Hostname> = <Service1>[,<Service2>,<Service3>]
 # Where:
 #   Hostname = Must be unique.  Will be used for hostname in EC2 or should match hostname on your own cluster
 #   Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
 #             mesosmaster, worker, fluo, metrics, spark). The following services are required: namenode, resourcemanager,
 #             accumulomaster, zookeeper & worker
 leader1 = namenode,resourcemanager,accumulomaster,zookeeper
 leader2 = metrics
 worker1 = worker,swarmmanager
 worker2 = worker
 worker3 = worker
 worker4 = worker
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	[general]
	# Cluster type (ec2 or existing)
	cluster_type = ec2
	# Cluster user name (install command will SSH to cluster using this user)
	# Leave default below if launching cluster in AWS
	cluster_user = centos
	# Cluster user group
	cluster_group = %(cluster_user)s
	# Cluster user home directory
	user_home = /home/%(cluster_user)s
	# Install directory where Hadoop, Accumulo, etc will be installed
	install_dir = %(user_home)s/install
	# Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given
	# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
	# from your machine. Hostname can be chosen from "nodes" section below.
	proxy_hostname = leader1
	# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
	#proxy_socks_port = 38585
	# Accumulo Instance name
	accumulo_instance = muchos
	# Accumluo Password
	accumulo_password = secret
	# Software versions (set sha-256 in conf/checksums)
	hadoop_version = 3.1.1
	zookeeper_version = 3.4.14
	spark_version = 2.3.2
	fluo_version = 1.2.0
	fluo_yarn_version = 1.0.0
	accumulo_version = 2.0.0-alpha-1
	# Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
	download_software = True
	# Install Hub (for GitHub)
	install_hub = True

	[ec2]
	# AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
	# You may need to change this value if a new image has been released or you are running in a different region.
	# Before using this AMI, subscribe to it on the CentOS product page below or launching will fail:
	# https://aws.amazon.com/marketplace/pp/B00O7WM7QW
	aws_ami = ami-9887c6e7
	# Type of AWS instance launched by default
	default_instance_type = m5d.large
	# Type of AWS instance launched for any node running 'worker' service
	# Leave default below to use same instance type set by 'default_instance_type' property
	worker_instance_type = %(default_instance_type)s
	# VPC to launch instances in (optional)
	#vpc_id = vpc-xxxxx
	# VPC Subnet to launch instances in (optional)
	#subnet_id = subnet-xxxxxx
	# Security group ID to launch in (optional)
	#security_group_id = sg-xxxxxx
	# Name of public key that will be loaded by Amazon on to your EC2 instances.
	# You can upload and name your public key using the EC2 Management Console.
	# Only the user with this key will be able to SSH to the cluster.
	# Name below should be your 'Key pair name' in EC2 and not name of your public key file.
	key_name = my_aws_key
	# Type of filesystem to format instance storage as.
	fstype = ext3
	# Force formatting of instance devices, even when it has an existing filesystem.
	force_format = no
	# Tags to add instances
	#instance_tags = key1:value1,key2:value2
	# Nodes will be given public IP addresses if true
	associate_public_ip = true
	# Path to file containing user data that will be executed at launch
	#user_data_path = /path/to/user_data
	# Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
	shutdown_delay_minutes = 0
	# Shutdown behavior of EC2 instances: terminate or stop
	shutdown_behavior = stop

	[existing]
	# Root of data dirs
	mount_root = /var/data
	# Data directories on all nodes
	data_dirs = /var/data1,/var/data2,/var/data3
	# Identifies drives for metrics
	metrics_drive_ids = var-data1,var-data2,var-data3

	[performance]
	# Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
	# creating a performance profile. Try not to use more memory than each node has
	# and leave some space for the OS.
	profile=perf-small

	# Below are different performance profiles that can be selected. Each profile
	# has the same properties with different values.

	[perf-small]
	# Amount of JVM heap for each tserver
	accumulo_tserv_mem=2G
	# Amount of data cache for each tserver. Only applies when using Accumulo 1.x
	accumulo_dcache_size=768M
	# Amount of index cache for each tserver. Only applies when using Accumulo 1.x
	accumulo_icache_size=256M
	# In memory map size for each tserver. Only applies when using Accumulo 1.x
	accumulo_imap_size=512M
	# Amount of JVM heap for each Fluo worker
	fluo_worker_mem_mb=2048
	# Determines the gap between the Yarn memory limit and the java -Xmx setting.
	# For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
	# set to 256, then for workers the java -Xmx setting will be set to 2048-256.
	# If yarn is killing worker processes because they are using too much memory,
	# then consider increasing this setting.
	twill_reserve_mem_mb=256
	# Number of threads for each Flup worker
	fluo_worker_threads=20
	# Number of worker to run per node
	fluo_worker_instances_multiplier=1
	# Max amount of memory for YARN per node
	yarn_nm_mem_mb=4096

	[perf-medium]
	accumulo_tserv_mem=3G
	# Accumulo configs below only apply when using Accumulo 1.x
	accumulo_dcache_size=1536M
	accumulo_icache_size=512M
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=1
	yarn_nm_mem_mb=8192

	[perf-large]
	accumulo_tserv_mem=4G
	# Accumulo configs below only apply when using Accumulo 1.x
	accumulo_dcache_size=2G
	accumulo_icache_size=1G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	twill_reserve_mem_mb=512
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=2
	yarn_nm_mem_mb=16384

	[ansible-vars]
	# This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
	# Expected format: variable = value

	[nodes]
	# Describes nodes in cluster in the following format:
	# <Hostname> = <Service1>[,<Service2>,<Service3>]
	# Where:
	# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
	# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
	# mesosmaster, worker, fluo, metrics, spark). The following services are required: namenode, resourcemanager,
	# accumulomaster, zookeeper & worker
	leader1 = namenode,resourcemanager,accumulomaster,zookeeper
	leader2 = metrics
	worker1 = worker,swarmmanager
	worker2 = worker
	worker3 = worker
	worker4 = worker