blob: 23aef0ca955fb5d335d27c6b2c6c2cee3d4d9b10 [file] [log] [blame]
# Copyright 2014 Fluo authors (see AUTHORS)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[general]
# type of cloud service (ec2 or baremetal)
cloud_provider = ec2
# Cluster user name (install command will SSH to cluster using this user)
# Leave default below if launching cluster in AWS
cluster_user = centos
# Cluster base directory where install/ & tarballs/ directories are created
# Leave default below if launching cluster in AWS
cluster_basedir = /home/centos
# Hostname of proxy node that fluo-deploy will use to direct installation of cluster. Will be given
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
proxy_hostname = leader1
# Cluster network interface (leave default if launching in AWS)
network_interface=eth0
# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'fluo-deploy ssh <cluster>'
#proxy_socks_port = 38585
# Apache mirror to download Hadoop, Zookeeper, Accumulo
apache_mirror = http://www.gtlib.gatech.edu/pub/apache
# Accumulo Instance name
accumulo_instance = instance16
# Accumluo Password
accumulo_password = secret
# Software versions
fluo_version = 1.0.0-beta-2
hadoop_version = 2.6.3
zookeeper_version = 3.4.6
spark_version = 1.5.1
spark_bin_version = 1.5.1-bin-hadoop2.6
maven_version = 3.2.5
accumulo_version = 1.6.4
# Software sha256 checksums
hadoop_sha256 = ada83d8c2ff72d4665ca2d70ce77af79bd57265beb3ce253cd2869b507e32152
zookeeper_sha256 = 01b3938547cd620dc4c93efe07c0360411f4a66962a70500b163b59014046994
spark_sha256 = 41ab59b28581b7952e3b0cfd8182980f033d2bf22d0f6a088ee6d120ddf24953
maven_sha256 = 8c190264bdf591ff9f1268dc0ad940a2726f9e958e367716a09b8aaa7e74a755
# Accumulo checksum is only checked if not a SNAPSHOT version.
accumulo_sha256 = 6853be86dbf60a0e1efb1431e78a99fc1fc98c9a8fa1feae655def2ee33b30ff
[ec2]
# AWS Access & Secret keys
aws_access_key = access_key
aws_secret_key = secret_key
# EC2 region to connect to
region = us-east-1
# Type of AWS instance launched by default
default_instance_type = m3.large
# Type of AWS instance launched for any node running 'worker' service
# Leave default below to use same instance type set by 'default_instance_type' property
worker_instance_type = %(default_instance_type)s
# VPC to launch instances in (optional)
#vpc_id = vpc-xxxxx
# VPC Subnet to launch instances in (optional)
#subnet_id = subnet-xxxxxx
# Name of public key that will be loaded by Amazon on to your EC2 instances.
# You can upload and name your public key using the EC2 Management Console.
# Only the user with this key will be able to SSH to the cluster.
key_name = my_aws_key
#Type of filesystem to format instance storage as.
fstype = ext3
#Force formatting of instance devices, even when it has an existing filesystem.
force_format = no
# Tags to add instances
#instance_tags = key1:value1,key2:value2
[baremetal]
mount_root = /var/disk
mounts = /var/disk01,/var/disk02,/var/disk03
devices = /dev/hdb1,/dev/hdc1,/dev/hdd1
metrics_drive_ids = var-disk01,var-disk02,var-disk03
[performance]
#Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
#creating a performance profile. Try not to use more memory than each node has
#and leave some space for the OS.
profile=perf-small
#Below are different performance profiles that can be selected. Each profile
#has the same properties with different values.
[perf-small]
#amount of JVM heap for each tserver
accumulo_tserv_mem=2G
#amount of data cache for each tserver
accumulo_dcache_size=768M
#amount of index cache for each tserver
accumulo_icache_size=256M
#in memory map size for each tserver
accumulo_imap_size=512M
#amount of JVM heap for each Fluo worker
fluo_worker_mem_mb=2048
#Number of threads for each Flup worker
fluo_worker_threads=20
#Number of worker to run per node
fluo_worker_instances_multiplier=1
#Max amount of memory for YARN per node
yarn_nm_mem_mb=4096
[perf-medium]
accumulo_tserv_mem=3G
accumulo_dcache_size=1536M
accumulo_icache_size=512M
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
fluo_worker_threads=64
fluo_worker_instances_multiplier=1
yarn_nm_mem_mb=8192
[perf-large]
accumulo_tserv_mem=4G
accumulo_dcache_size=2G
accumulo_icache_size=1G
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
fluo_worker_threads=64
fluo_worker_instances_multiplier=2
yarn_nm_mem_mb=16384
[apps]
###
# This section is used to configure Fluo applications which are indentified by a unique application
# name (which replaces <app> below). Each test has the following configuration settings:
#
# <app>_repo # Git repo containing code for test. It is cloned when the test is run.
# <app>_branch # Branch to checkout in git repo
# <app>_command # Test script in repo that starts test
#
# Applications can be run using the following command 'fluo-deploy run -a <app>`
# Arguments can be added after <app> and will be passed to configured commands for that app.
# Any files or directories referenced by arguments must exist on leader node.
# Stress test application
# Usage: fluo-deploy run -a stress
# Notes: - Stress test can take up to one hour to complete
# - Status will be output to console
# - Logs are written to install/tests/stress/logs
stress_repo=https://github.com/fluo-io/fluo-stress
stress_branch=master
stress_command=bin/run-test.sh
# Phrasecount application
# Usage: fluo-deploy run -a phrasecount --args <txtDocsDir>
# Notes: - The directory <txtDocsDir> must exist on leader node
# - Cluster cannot be set up with multiple zookeepers until FLUO-521 is resolved.
phrasecount_repo=https://github.com/fluo-io/phrasecount
phrasecount_branch=master
phrasecount_command=bin/run.sh
# Webindex application
# Usage: fluo-deploy run -a webindex --args <args>
# Notes: - If setting multiple args, wrap them in quotes
# - Set <args> to 'test-id load1 -fg' to load 1 Common Crawl file.
webindex_repo=https://github.com/fluo-io/webindex
webindex_branch=master
webindex_command=bin/webindex
[nodes]
# Describes nodes in cluster in the following format:
# <Hostname> = <Service1>[,<Service2>,<Service3>]
# Where:
# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics)
# All services are required below except for fluo & metrics which are optional
leader1 = namenode,zookeeper,fluo
leader2 = resourcemanager,zookeeper
leader3 = accumulomaster,zookeeper
metrics = metrics
worker1 = worker
worker2 = worker
worker3 = worker