blob: 31a8681f09172aca3ed5888940cf2adcf4651481 [file] [log] [blame]
# Copyright 2014 Fluo authors (see AUTHORS)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[general]
# Cluster user name (install command will SSH to cluster using this user)
# Leave default below if launching cluster in AWS
cluster.username = centos
# Cluster base directory where install/ & tarballs/ directories are created
# Leave default below if launching cluster in AWS
cluster.base.dir = /home/centos
# Hostname of proxy node that fluo-deploy will use to direct installation of cluster. Will be given
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
proxy.hostname = leader1
# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'fluo-deploy ssh <cluster>'
#proxy.socks.port = 38585
# Specifies if SSH, Hosts, and .bashrc should be configured on cluster
configure.cluster = true
# Apache mirror to download Hadoop, Zookeeper, Accumulo
apache.mirror = http://www.gtlib.gatech.edu/pub/apache
# Accumulo Instance name
accumulo.instance = instance16
# Accumluo Password
accumulo.password = secret
# Software versions
fluo.version = 1.0.0-beta-2-SNAPSHOT
hadoop.version = 2.7.0
zookeeper.version = 3.4.7
spark.version = 1.5.1-bin-hadoop2.6
influxdb.version = 0.9.4.2
grafana.version = 2.5.0
#If the Accumulo version contains SNAPSHOT, then the scripts expect it to be in cluster/tarballs/ and will not try to download from a mirror.
accumulo.version = 1.6.4
# Software md5 checksums
hadoop.md5.hash = 79a6e87b09011861309c153a856c3ca1
zookeeper.md5.hash = 58b515d1c1352e135d17c9a9a9ffedd0
spark.md5.hash = 6b0830240dc9f18e3a1ab29994cc4d20
influxdb.md5.hash = 6abb3d5df9b69aeb9bae37d0889bf67a
grafana.md5.hash = e5fe934a27e94f954e87f5d18043a40e
#Accumulo hash is only checked if not a SNAPSHOT version.
accumulo.md5.hash = 530dfe488d5c2ee415b514ce72baa691
[ec2]
# AWS Access & Secret keys
aws.access.key = access_key
aws.secret.key = secret_key
# EC2 region to connect to
region = us-east-1
# Type of AWS instance launched by default
default.instance.type = m3.large
# Type of AWS instance launched for any node running 'worker' service
# Leave default below to use same instance type set by 'default.instance.type' property
worker.instance.type = %(default.instance.type)s
# VPC to launch instances in (optional)
#vpc.id = vpc-xxxxx
# VPC Subnet to launch instances in (optional)
#subnet.id = subnet-xxxxxx
# Name of public key that will be loaded by Amazon on to your EC2 instances.
# You can upload and name your public key using the EC2 Management Console.
# Only the user with this key will be able to SSH to the cluster.
key.name = my_aws_key
# Tags to add instances
#instance.tags = key1:value1,key2:value2
#Size in GB of EBS root device. Set to 0 or don't set to use default. For some
#reason setting this to non zero causes some instance types to fail, so use with
#caution. See issue #59
ebs.root.size = 0
[performance]
#Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
#creating a performance profile. Try not to use more memory than each node has
#and leave some space for the OS.
profile=perf-small
#Below are different performance profiles that can be selected. Each profile
#has the same properties with different values.
[perf-small]
#amount of JVM heap for each tserver
accumulo.tserv.mem=2G
#amount of data cache for each tserver
accumulo.dcache.size=768M
#amount of index cache for each tserver
accumulo.icache.size=256M
#in memory map size for each tserver
accumulo.imap.size=512M
#amount of JVM heap for each Fluo worker
fluo.worker.mem.mb=2048
#Number of threads for each Flup worker
fluo.worker.threads=20
#Number of worker to run per node
fluo.worker.instances.multiplier=1
#Max amount of memory for YARN per node
yarn.nm.mem.mb=4096
[perf-medium]
accumulo.tserv.mem=3G
accumulo.dcache.size=1536M
accumulo.icache.size=512M
accumulo.imap.size=512M
fluo.worker.mem.mb=4096
fluo.worker.threads=64
fluo.worker.instances.multiplier=1
yarn.nm.mem.mb=8192
[perf-large]
accumulo.tserv.mem=4G
accumulo.dcache.size=2G
accumulo.icache.size=1G
accumulo.imap.size=512M
fluo.worker.mem.mb=4096
fluo.worker.threads=64
fluo.worker.instances.multiplier=2
yarn.nm.mem.mb=16384
[apps]
###
# This section is used to configure Fluo applications which are indentified by a unique application
# name (which replaces <app> below). Each test has the following configuration settings:
#
# <app>.repo # Git repo containing code for test. It is cloned when the test is run.
# <app>.branch # Branch to checkout in git repo
# <app>.command.pre.init # Command in test repo to run before initializing and starting Fluo.
# # FLUO_HOME & FLUO_APP_NAME will be set in env when run. This command
# # can do things like change fluo config and copy observers
# <app>.command.post.start # Test script to run from test repo after Fluo is started
#
# Applications can be run using the following command 'fluo-deploy run -a <app>`
# Arguments can be added after <app> and will be passed to configured commands for that app.
# Any files or directories referenced by arguments must exist on leader node.
# Stress test application
# Usage: fluo-deploy run -a stress
# Notes: - Stress test can take up to one hour to complete
# - Status will be output to console
# - Logs are written to install/tests/stress/logs
stress.repo=https://github.com/fluo-io/fluo-stress
stress.branch=master
stress.command=bin/run-test.sh
# Phrasecount application
# Usage: fluo-deploy run -a phrasecount --args <txtDocsDir>
# Notes: - The directory <txtDocsDir> must exist on leader node
# - Cluster cannot be set up with multiple zookeepers until FLUO-521 is resolved.
phrasecount.repo=https://github.com/fluo-io/phrasecount
phrasecount.branch=master
phrasecount.command=bin/run.sh
# Webindex application
# Usage: fluo-deploy run -a webindex --args <args>
# Notes: - If setting multiple args, wrap them in quotes
# - Set <args> to 'test-id load1 -fg' to load 1 Common Crawl file.
webindex.repo=https://github.com/fluo-io/webindex
webindex.branch=master
webindex.command=bin/webindex
[nodes]
# Describes nodes in cluster in the following format:
# <Hostname> = <Service1>[,<Service2>,<Service3>]
# Where:
# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics, dev)
# All services are required below except for fluo, dev, & metrics which are optional
leader1 = namenode,zookeeper,fluo,dev
leader2 = resourcemanager,zookeeper
leader3 = accumulomaster,zookeeper
metrics = metrics
worker1 = worker
worker2 = worker
worker3 = worker