conf/zetten.props.example - fluo-muchos - Git at Google

 # Copyright 2014 Fluo authors (see AUTHORS)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 [general]
 # Type of cloud service (ec2 or baremetal)
 cloud_provider = ec2
 # Cluster user name (install command will SSH to cluster using this user)
 # Leave default below if launching cluster in AWS
 cluster_user = centos
 # Cluster base directory where install/ & tarballs/ directories are created
 # Leave default below if launching cluster in AWS
 cluster_basedir = /home/centos
 # Hostname of proxy node that zetten will use to direct installation of cluster.  Will be given
 # public IP if launching in EC2.  If not launching in EC2, node must have public IP that can be reached
 # from your machine. Hostname can be chosen from "nodes" section below.
 proxy_hostname = leader1
 # Cluster network interface (leave default if launching in AWS)
 network_interface=eth0
 # If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'zetten ssh <cluster>'
 #proxy_socks_port = 38585
 # Accumulo Instance name
 accumulo_instance = instance17
 # Accumluo Password
 accumulo_password = secret
 # Software versions
 fluo_version = 1.0.0-beta-3-SNAPSHOT
 hadoop_version = 2.6.3
 zookeeper_version = 3.4.8
 spark_version = 1.5.1
 spark_bin_version = 1.5.1-bin-hadoop2.6
 maven_version = 3.2.5
 accumulo_version = 1.7.1
 # Software sha256 checksums
 hadoop_sha256 = ada83d8c2ff72d4665ca2d70ce77af79bd57265beb3ce253cd2869b507e32152
 zookeeper_sha256 = f10a0b51f45c4f64c1fe69ef713abf9eb9571bc7385a82da892e83bb6c965e90
 spark_sha256 = 41ab59b28581b7952e3b0cfd8182980f033d2bf22d0f6a088ee6d120ddf24953
 maven_sha256 = 8c190264bdf591ff9f1268dc0ad940a2726f9e958e367716a09b8aaa7e74a755
 # Accumulo checksum is only checked if not a SNAPSHOT version.
 accumulo_sha256 = 8312f30fceb2318dcde5ee667e49f060087d9327e015084a51d51cbe5a7e9f95

 [ec2]
 # AWS Access & Secret keys
 aws_access_key = access_key
 aws_secret_key = secret_key
 # EC2 region to connect to
 region = us-east-1
 # Type of AWS instance launched by default
 default_instance_type = m3.large
 # Type of AWS instance launched for any node running 'worker' service
 # Leave default below to use same instance type set by 'default_instance_type' property
 worker_instance_type = %(default_instance_type)s
 # VPC to launch instances in (optional)
 #vpc_id = vpc-xxxxx
 # VPC Subnet to launch instances in (optional)
 #subnet_id = subnet-xxxxxx
 # Name of public key that will be loaded by Amazon on to your EC2 instances.
 # You can upload and name your public key using the EC2 Management Console.
 # Only the user with this key will be able to SSH to the cluster.
 # Name below should be your 'Key pair name' in EC2 and not name of your public key file.
 key_name = my_aws_key
 #Type of filesystem to format instance storage as.
 fstype = ext3
 #Force formatting of instance devices, even when it has an existing filesystem.
 force_format = no
 # Tags to add instances
 #instance_tags = key1:value1,key2:value2
 # Overrides the default CentOS 7 AWS AMIs used by Zetten
 # Only set if you want to use a custom AMI (which should be derived from a CentOS 7 AMI)
 #aws_ami = ami-xxxxxxxx

 [baremetal]
 mount_root = /var/disk
 mounts = /var/disk01,/var/disk02,/var/disk03
 devices = /dev/hdb1,/dev/hdc1,/dev/hdd1
 metrics_drive_ids = var-disk01,var-disk02,var-disk03

 [performance]
 #Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
 #creating a performance profile.  Try not to use more memory than each node has
 #and leave some space for the OS.
 profile=perf-small

 #Below are different performance profiles that can be selected.  Each profile
 #has the same properties with different values.

 [perf-small]
 #amount of JVM heap for each tserver
 accumulo_tserv_mem=2G
 #amount of data cache for each tserver
 accumulo_dcache_size=768M
 #amount of index cache for each tserver
 accumulo_icache_size=256M
 #in memory map size for each tserver
 accumulo_imap_size=512M
 #amount of JVM heap for each Fluo worker
 fluo_worker_mem_mb=2048
 #Number of threads for each Flup worker
 fluo_worker_threads=20
 #Number of worker to run per node
 fluo_worker_instances_multiplier=1
 #Max amount of memory for YARN per node
 yarn_nm_mem_mb=4096

 [perf-medium]
 accumulo_tserv_mem=3G
 accumulo_dcache_size=1536M
 accumulo_icache_size=512M
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=1
 yarn_nm_mem_mb=8192

 [perf-large]
 accumulo_tserv_mem=4G
 accumulo_dcache_size=2G
 accumulo_icache_size=1G
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=2
 yarn_nm_mem_mb=16384

 [nodes]
 # Describes nodes in cluster in the following format:
 # <Hostname> = <Service1>[,<Service2>,<Service3>]
 # Where:
 #   Hostname = Must be unique.  Will be used for hostname in EC2 or should match hostname on your own cluster
 #   Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics)
 #             All services are required below except for fluo & metrics which are optional
 leader1 = namenode,zookeeper,fluo
 leader2 = resourcemanager,zookeeper
 leader3 = accumulomaster,zookeeper
 metrics = metrics
 worker1 = worker
 worker2 = worker
 worker3 = worker
	# Copyright 2014 Fluo authors (see AUTHORS)
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	[general]
	# Type of cloud service (ec2 or baremetal)
	cloud_provider = ec2
	# Cluster user name (install command will SSH to cluster using this user)
	# Leave default below if launching cluster in AWS
	cluster_user = centos
	# Cluster base directory where install/ & tarballs/ directories are created
	# Leave default below if launching cluster in AWS
	cluster_basedir = /home/centos
	# Hostname of proxy node that zetten will use to direct installation of cluster. Will be given
	# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
	# from your machine. Hostname can be chosen from "nodes" section below.
	proxy_hostname = leader1
	# Cluster network interface (leave default if launching in AWS)
	network_interface=eth0
	# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'zetten ssh <cluster>'
	#proxy_socks_port = 38585
	# Accumulo Instance name
	accumulo_instance = instance17
	# Accumluo Password
	accumulo_password = secret
	# Software versions
	fluo_version = 1.0.0-beta-3-SNAPSHOT
	hadoop_version = 2.6.3
	zookeeper_version = 3.4.8
	spark_version = 1.5.1
	spark_bin_version = 1.5.1-bin-hadoop2.6
	maven_version = 3.2.5
	accumulo_version = 1.7.1
	# Software sha256 checksums
	hadoop_sha256 = ada83d8c2ff72d4665ca2d70ce77af79bd57265beb3ce253cd2869b507e32152
	zookeeper_sha256 = f10a0b51f45c4f64c1fe69ef713abf9eb9571bc7385a82da892e83bb6c965e90
	spark_sha256 = 41ab59b28581b7952e3b0cfd8182980f033d2bf22d0f6a088ee6d120ddf24953
	maven_sha256 = 8c190264bdf591ff9f1268dc0ad940a2726f9e958e367716a09b8aaa7e74a755
	# Accumulo checksum is only checked if not a SNAPSHOT version.
	accumulo_sha256 = 8312f30fceb2318dcde5ee667e49f060087d9327e015084a51d51cbe5a7e9f95

	[ec2]
	# AWS Access & Secret keys
	aws_access_key = access_key
	aws_secret_key = secret_key
	# EC2 region to connect to
	region = us-east-1
	# Type of AWS instance launched by default
	default_instance_type = m3.large
	# Type of AWS instance launched for any node running 'worker' service
	# Leave default below to use same instance type set by 'default_instance_type' property
	worker_instance_type = %(default_instance_type)s
	# VPC to launch instances in (optional)
	#vpc_id = vpc-xxxxx
	# VPC Subnet to launch instances in (optional)
	#subnet_id = subnet-xxxxxx
	# Name of public key that will be loaded by Amazon on to your EC2 instances.
	# You can upload and name your public key using the EC2 Management Console.
	# Only the user with this key will be able to SSH to the cluster.
	# Name below should be your 'Key pair name' in EC2 and not name of your public key file.
	key_name = my_aws_key
	#Type of filesystem to format instance storage as.
	fstype = ext3
	#Force formatting of instance devices, even when it has an existing filesystem.
	force_format = no
	# Tags to add instances
	#instance_tags = key1:value1,key2:value2
	# Overrides the default CentOS 7 AWS AMIs used by Zetten
	# Only set if you want to use a custom AMI (which should be derived from a CentOS 7 AMI)
	#aws_ami = ami-xxxxxxxx

	[baremetal]
	mount_root = /var/disk
	mounts = /var/disk01,/var/disk02,/var/disk03
	devices = /dev/hdb1,/dev/hdc1,/dev/hdd1
	metrics_drive_ids = var-disk01,var-disk02,var-disk03

	[performance]
	#Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
	#creating a performance profile. Try not to use more memory than each node has
	#and leave some space for the OS.
	profile=perf-small

	#Below are different performance profiles that can be selected. Each profile
	#has the same properties with different values.

	[perf-small]
	#amount of JVM heap for each tserver
	accumulo_tserv_mem=2G
	#amount of data cache for each tserver
	accumulo_dcache_size=768M
	#amount of index cache for each tserver
	accumulo_icache_size=256M
	#in memory map size for each tserver
	accumulo_imap_size=512M
	#amount of JVM heap for each Fluo worker
	fluo_worker_mem_mb=2048
	#Number of threads for each Flup worker
	fluo_worker_threads=20
	#Number of worker to run per node
	fluo_worker_instances_multiplier=1
	#Max amount of memory for YARN per node
	yarn_nm_mem_mb=4096

	[perf-medium]
	accumulo_tserv_mem=3G
	accumulo_dcache_size=1536M
	accumulo_icache_size=512M
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=1
	yarn_nm_mem_mb=8192

	[perf-large]
	accumulo_tserv_mem=4G
	accumulo_dcache_size=2G
	accumulo_icache_size=1G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=2
	yarn_nm_mem_mb=16384

	[nodes]
	# Describes nodes in cluster in the following format:
	# <Hostname> = <Service1>[,<Service2>,<Service3>]
	# Where:
	# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
	# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics)
	# All services are required below except for fluo & metrics which are optional
	leader1 = namenode,zookeeper,fluo
	leader2 = resourcemanager,zookeeper
	leader3 = accumulomaster,zookeeper
	metrics = metrics
	worker1 = worker
	worker2 = worker
	worker3 = worker