conf/fluo-deploy.props.example - fluo-muchos - Git at Google

 # Copyright 2014 Fluo authors (see AUTHORS)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 [general]
 # type of cloud service (ec2 or baremetal)
 cloud_provider = ec2
 # Cluster user name (install command will SSH to cluster using this user)
 # Leave default below if launching cluster in AWS
 cluster_user = centos
 # Cluster base directory where install/ & tarballs/ directories are created
 # Leave default below if launching cluster in AWS
 cluster_basedir = /home/centos
 # Hostname of proxy node that fluo-deploy will use to direct installation of cluster.  Will be given
 # public IP if launching in EC2.  If not launching in EC2, node must have public IP that can be reached
 # from your machine. Hostname can be chosen from "nodes" section below.
 proxy_hostname = leader1
 # Cluster network interface (leave default if launching in AWS)
 network_interface=eth0
 # If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'fluo-deploy ssh <cluster>'
 #proxy_socks_port = 38585
 # Apache mirror to download Hadoop, Zookeeper, Accumulo
 apache_mirror = http://www.gtlib.gatech.edu/pub/apache
 # Accumulo Instance name
 accumulo_instance = instance16
 # Accumluo Password
 accumulo_password = secret
 # Software versions
 fluo_version = 1.0.0-beta-2
 hadoop_version = 2.6.3
 zookeeper_version = 3.4.6
 spark_version = 1.5.1
 spark_bin_version = 1.5.1-bin-hadoop2.6
 maven_version = 3.2.5
 accumulo_version = 1.6.4
 # Software sha256 checksums
 hadoop_sha256 = ada83d8c2ff72d4665ca2d70ce77af79bd57265beb3ce253cd2869b507e32152
 zookeeper_sha256 = 01b3938547cd620dc4c93efe07c0360411f4a66962a70500b163b59014046994
 spark_sha256 = 41ab59b28581b7952e3b0cfd8182980f033d2bf22d0f6a088ee6d120ddf24953
 maven_sha256 = 8c190264bdf591ff9f1268dc0ad940a2726f9e958e367716a09b8aaa7e74a755
 # Accumulo checksum is only checked if not a SNAPSHOT version.
 accumulo_sha256 = 6853be86dbf60a0e1efb1431e78a99fc1fc98c9a8fa1feae655def2ee33b30ff

 [ec2]
 # AWS Access & Secret keys
 aws_access_key = access_key
 aws_secret_key = secret_key
 # EC2 region to connect to
 region = us-east-1
 # Type of AWS instance launched by default
 default_instance_type = m3.large
 # Type of AWS instance launched for any node running 'worker' service
 # Leave default below to use same instance type set by 'default_instance_type' property
 worker_instance_type = %(default_instance_type)s
 # VPC to launch instances in (optional)
 #vpc_id = vpc-xxxxx
 # VPC Subnet to launch instances in (optional)
 #subnet_id = subnet-xxxxxx
 # Name of public key that will be loaded by Amazon on to your EC2 instances.
 # You can upload and name your public key using the EC2 Management Console.
 # Only the user with this key will be able to SSH to the cluster.
 key_name = my_aws_key
 #Type of filesystem to format instance storage as.
 fstype = ext3
 #Force formatting of instance devices, even when it has an existing filesystem.
 force_format = no
 # Tags to add instances
 #instance_tags = key1:value1,key2:value2


 [baremetal]
 mount_root = /var/disk
 mounts = /var/disk01,/var/disk02,/var/disk03
 devices = /dev/hdb1,/dev/hdc1,/dev/hdd1
 metrics_drive_ids = var-disk01,var-disk02,var-disk03

 [performance]
 #Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
 #creating a performance profile.  Try not to use more memory than each node has
 #and leave some space for the OS.
 profile=perf-small

 #Below are different performance profiles that can be selected.  Each profile
 #has the same properties with different values.

 [perf-small]
 #amount of JVM heap for each tserver
 accumulo_tserv_mem=2G
 #amount of data cache for each tserver
 accumulo_dcache_size=768M
 #amount of index cache for each tserver
 accumulo_icache_size=256M
 #in memory map size for each tserver
 accumulo_imap_size=512M
 #amount of JVM heap for each Fluo worker
 fluo_worker_mem_mb=2048
 #Number of threads for each Flup worker
 fluo_worker_threads=20
 #Number of worker to run per node
 fluo_worker_instances_multiplier=1
 #Max amount of memory for YARN per node
 yarn_nm_mem_mb=4096

 [perf-medium]
 accumulo_tserv_mem=3G
 accumulo_dcache_size=1536M
 accumulo_icache_size=512M
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=1
 yarn_nm_mem_mb=8192

 [perf-large]
 accumulo_tserv_mem=4G
 accumulo_dcache_size=2G
 accumulo_icache_size=1G
 accumulo_imap_size=512M
 fluo_worker_mem_mb=4096
 fluo_worker_threads=64
 fluo_worker_instances_multiplier=2
 yarn_nm_mem_mb=16384

 [apps]
 ###
 # This section is used to configure Fluo applications which are indentified by a unique application
 # name (which replaces <app> below).  Each test has the following configuration settings:
 #
 # <app>_repo                # Git repo containing code for test. It is cloned when the test is run.
 # <app>_branch              # Branch to checkout in git repo
 # <app>_command             # Test script in repo that starts test
 #
 # Applications can be run using the following command 'fluo-deploy run -a <app>`
 # Arguments can be added after <app> and will be passed to configured commands for that app.
 # Any files or directories referenced by arguments must exist on leader node.

 # Stress test application
 # Usage: fluo-deploy run -a stress
 # Notes: - Stress test can take up to one hour to complete
 # 			 - Status will be output to console
 # 			 - Logs are written to install/tests/stress/logs
 stress_repo=https://github.com/fluo-io/fluo-stress
 stress_branch=master
 stress_command=bin/run-test.sh

 # Phrasecount application
 # Usage: fluo-deploy run -a phrasecount --args <txtDocsDir>
 # Notes: - The directory <txtDocsDir> must exist on leader node
 #        - Cluster cannot be set up with multiple zookeepers until FLUO-521 is resolved.
 phrasecount_repo=https://github.com/fluo-io/phrasecount
 phrasecount_branch=master
 phrasecount_command=bin/run.sh

 # Webindex application
 # Usage: fluo-deploy run -a webindex --args <args>
 # Notes: - If setting multiple args, wrap them in quotes
 #        - Set <args> to 'test-id load1 -fg' to load 1 Common Crawl file.
 webindex_repo=https://github.com/fluo-io/webindex
 webindex_branch=master
 webindex_command=bin/webindex

 [nodes]
 # Describes nodes in cluster in the following format:
 # <Hostname> = <Service1>[,<Service2>,<Service3>]
 # Where:
 #   Hostname = Must be unique.  Will be used for hostname in EC2 or should match hostname on your own cluster
 #   Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics)
 #             All services are required below except for fluo & metrics which are optional
 leader1 = namenode,zookeeper,fluo
 leader2 = resourcemanager,zookeeper
 leader3 = accumulomaster,zookeeper
 metrics = metrics
 worker1 = worker
 worker2 = worker
 worker3 = worker
	# Copyright 2014 Fluo authors (see AUTHORS)
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	[general]
	# type of cloud service (ec2 or baremetal)
	cloud_provider = ec2
	# Cluster user name (install command will SSH to cluster using this user)
	# Leave default below if launching cluster in AWS
	cluster_user = centos
	# Cluster base directory where install/ & tarballs/ directories are created
	# Leave default below if launching cluster in AWS
	cluster_basedir = /home/centos
	# Hostname of proxy node that fluo-deploy will use to direct installation of cluster. Will be given
	# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
	# from your machine. Hostname can be chosen from "nodes" section below.
	proxy_hostname = leader1
	# Cluster network interface (leave default if launching in AWS)
	network_interface=eth0
	# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'fluo-deploy ssh <cluster>'
	#proxy_socks_port = 38585
	# Apache mirror to download Hadoop, Zookeeper, Accumulo
	apache_mirror = http://www.gtlib.gatech.edu/pub/apache
	# Accumulo Instance name
	accumulo_instance = instance16
	# Accumluo Password
	accumulo_password = secret
	# Software versions
	fluo_version = 1.0.0-beta-2
	hadoop_version = 2.6.3
	zookeeper_version = 3.4.6
	spark_version = 1.5.1
	spark_bin_version = 1.5.1-bin-hadoop2.6
	maven_version = 3.2.5
	accumulo_version = 1.6.4
	# Software sha256 checksums
	hadoop_sha256 = ada83d8c2ff72d4665ca2d70ce77af79bd57265beb3ce253cd2869b507e32152
	zookeeper_sha256 = 01b3938547cd620dc4c93efe07c0360411f4a66962a70500b163b59014046994
	spark_sha256 = 41ab59b28581b7952e3b0cfd8182980f033d2bf22d0f6a088ee6d120ddf24953
	maven_sha256 = 8c190264bdf591ff9f1268dc0ad940a2726f9e958e367716a09b8aaa7e74a755
	# Accumulo checksum is only checked if not a SNAPSHOT version.
	accumulo_sha256 = 6853be86dbf60a0e1efb1431e78a99fc1fc98c9a8fa1feae655def2ee33b30ff

	[ec2]
	# AWS Access & Secret keys
	aws_access_key = access_key
	aws_secret_key = secret_key
	# EC2 region to connect to
	region = us-east-1
	# Type of AWS instance launched by default
	default_instance_type = m3.large
	# Type of AWS instance launched for any node running 'worker' service
	# Leave default below to use same instance type set by 'default_instance_type' property
	worker_instance_type = %(default_instance_type)s
	# VPC to launch instances in (optional)
	#vpc_id = vpc-xxxxx
	# VPC Subnet to launch instances in (optional)
	#subnet_id = subnet-xxxxxx
	# Name of public key that will be loaded by Amazon on to your EC2 instances.
	# You can upload and name your public key using the EC2 Management Console.
	# Only the user with this key will be able to SSH to the cluster.
	key_name = my_aws_key
	#Type of filesystem to format instance storage as.
	fstype = ext3
	#Force formatting of instance devices, even when it has an existing filesystem.
	force_format = no
	# Tags to add instances
	#instance_tags = key1:value1,key2:value2


	[baremetal]
	mount_root = /var/disk
	mounts = /var/disk01,/var/disk02,/var/disk03
	devices = /dev/hdb1,/dev/hdc1,/dev/hdd1
	metrics_drive_ids = var-disk01,var-disk02,var-disk03

	[performance]
	#Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
	#creating a performance profile. Try not to use more memory than each node has
	#and leave some space for the OS.
	profile=perf-small

	#Below are different performance profiles that can be selected. Each profile
	#has the same properties with different values.

	[perf-small]
	#amount of JVM heap for each tserver
	accumulo_tserv_mem=2G
	#amount of data cache for each tserver
	accumulo_dcache_size=768M
	#amount of index cache for each tserver
	accumulo_icache_size=256M
	#in memory map size for each tserver
	accumulo_imap_size=512M
	#amount of JVM heap for each Fluo worker
	fluo_worker_mem_mb=2048
	#Number of threads for each Flup worker
	fluo_worker_threads=20
	#Number of worker to run per node
	fluo_worker_instances_multiplier=1
	#Max amount of memory for YARN per node
	yarn_nm_mem_mb=4096

	[perf-medium]
	accumulo_tserv_mem=3G
	accumulo_dcache_size=1536M
	accumulo_icache_size=512M
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=1
	yarn_nm_mem_mb=8192

	[perf-large]
	accumulo_tserv_mem=4G
	accumulo_dcache_size=2G
	accumulo_icache_size=1G
	accumulo_imap_size=512M
	fluo_worker_mem_mb=4096
	fluo_worker_threads=64
	fluo_worker_instances_multiplier=2
	yarn_nm_mem_mb=16384

	[apps]
	###
	# This section is used to configure Fluo applications which are indentified by a unique application
	# name (which replaces <app> below). Each test has the following configuration settings:
	#
	# <app>_repo # Git repo containing code for test. It is cloned when the test is run.
	# <app>_branch # Branch to checkout in git repo
	# <app>_command # Test script in repo that starts test
	#
	# Applications can be run using the following command 'fluo-deploy run -a <app>`
	# Arguments can be added after <app> and will be passed to configured commands for that app.
	# Any files or directories referenced by arguments must exist on leader node.

	# Stress test application
	# Usage: fluo-deploy run -a stress
	# Notes: - Stress test can take up to one hour to complete
	# - Status will be output to console
	# - Logs are written to install/tests/stress/logs
	stress_repo=https://github.com/fluo-io/fluo-stress
	stress_branch=master
	stress_command=bin/run-test.sh

	# Phrasecount application
	# Usage: fluo-deploy run -a phrasecount --args <txtDocsDir>
	# Notes: - The directory <txtDocsDir> must exist on leader node
	# - Cluster cannot be set up with multiple zookeepers until FLUO-521 is resolved.
	phrasecount_repo=https://github.com/fluo-io/phrasecount
	phrasecount_branch=master
	phrasecount_command=bin/run.sh

	# Webindex application
	# Usage: fluo-deploy run -a webindex --args <args>
	# Notes: - If setting multiple args, wrap them in quotes
	# - Set <args> to 'test-id load1 -fg' to load 1 Common Crawl file.
	webindex_repo=https://github.com/fluo-io/webindex
	webindex_branch=master
	webindex_command=bin/webindex

	[nodes]
	# Describes nodes in cluster in the following format:
	# <Hostname> = <Service1>[,<Service2>,<Service3>]
	# Where:
	# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
	# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics)
	# All services are required below except for fluo & metrics which are optional
	leader1 = namenode,zookeeper,fluo
	leader2 = resourcemanager,zookeeper
	leader3 = accumulomaster,zookeeper
	metrics = metrics
	worker1 = worker
	worker2 = worker
	worker3 = worker