| # Copyright 2014 Fluo authors (see AUTHORS) |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| [general] |
| # type of cloud service (ec2 or baremetal) |
| cloud_provider = ec2 |
| # Cluster user name (install command will SSH to cluster using this user) |
| # Leave default below if launching cluster in AWS |
| cluster_user = centos |
| # Cluster base directory where install/ & tarballs/ directories are created |
| # Leave default below if launching cluster in AWS |
| cluster_basedir = /home/centos |
| # Hostname of proxy node that fluo-deploy will use to direct installation of cluster. Will be given |
| # public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached |
| # from your machine. Hostname can be chosen from "nodes" section below. |
| proxy_hostname = leader1 |
| # Cluster network interface (leave default if launching in AWS) |
| network_interface=eth0 |
| # If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'fluo-deploy ssh <cluster>' |
| #proxy_socks_port = 38585 |
| # Apache mirror to download Hadoop, Zookeeper, Accumulo |
| apache_mirror = http://www.gtlib.gatech.edu/pub/apache |
| # Accumulo Instance name |
| accumulo_instance = instance16 |
| # Accumluo Password |
| accumulo_password = secret |
| # Software versions |
| fluo_version = 1.0.0-beta-2 |
| hadoop_version = 2.6.3 |
| zookeeper_version = 3.4.6 |
| spark_version = 1.5.1 |
| spark_bin_version = 1.5.1-bin-hadoop2.6 |
| maven_version = 3.2.5 |
| accumulo_version = 1.6.4 |
| # Software sha256 checksums |
| hadoop_sha256 = ada83d8c2ff72d4665ca2d70ce77af79bd57265beb3ce253cd2869b507e32152 |
| zookeeper_sha256 = 01b3938547cd620dc4c93efe07c0360411f4a66962a70500b163b59014046994 |
| spark_sha256 = 41ab59b28581b7952e3b0cfd8182980f033d2bf22d0f6a088ee6d120ddf24953 |
| maven_sha256 = 8c190264bdf591ff9f1268dc0ad940a2726f9e958e367716a09b8aaa7e74a755 |
| # Accumulo checksum is only checked if not a SNAPSHOT version. |
| accumulo_sha256 = 6853be86dbf60a0e1efb1431e78a99fc1fc98c9a8fa1feae655def2ee33b30ff |
| |
| [ec2] |
| # AWS Access & Secret keys |
| aws_access_key = access_key |
| aws_secret_key = secret_key |
| # EC2 region to connect to |
| region = us-east-1 |
| # Type of AWS instance launched by default |
| default_instance_type = m3.large |
| # Type of AWS instance launched for any node running 'worker' service |
| # Leave default below to use same instance type set by 'default_instance_type' property |
| worker_instance_type = %(default_instance_type)s |
| # VPC to launch instances in (optional) |
| #vpc_id = vpc-xxxxx |
| # VPC Subnet to launch instances in (optional) |
| #subnet_id = subnet-xxxxxx |
| # Name of public key that will be loaded by Amazon on to your EC2 instances. |
| # You can upload and name your public key using the EC2 Management Console. |
| # Only the user with this key will be able to SSH to the cluster. |
| key_name = my_aws_key |
| #Type of filesystem to format instance storage as. |
| fstype = ext3 |
| #Force formatting of instance devices, even when it has an existing filesystem. |
| force_format = no |
| # Tags to add instances |
| #instance_tags = key1:value1,key2:value2 |
| |
| |
| [baremetal] |
| mount_root = /var/disk |
| mounts = /var/disk01,/var/disk02,/var/disk03 |
| devices = /dev/hdb1,/dev/hdc1,/dev/hdd1 |
| metrics_drive_ids = var-disk01,var-disk02,var-disk03 |
| |
| [performance] |
| #Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or |
| #creating a performance profile. Try not to use more memory than each node has |
| #and leave some space for the OS. |
| profile=perf-small |
| |
| #Below are different performance profiles that can be selected. Each profile |
| #has the same properties with different values. |
| |
| [perf-small] |
| #amount of JVM heap for each tserver |
| accumulo_tserv_mem=2G |
| #amount of data cache for each tserver |
| accumulo_dcache_size=768M |
| #amount of index cache for each tserver |
| accumulo_icache_size=256M |
| #in memory map size for each tserver |
| accumulo_imap_size=512M |
| #amount of JVM heap for each Fluo worker |
| fluo_worker_mem_mb=2048 |
| #Number of threads for each Flup worker |
| fluo_worker_threads=20 |
| #Number of worker to run per node |
| fluo_worker_instances_multiplier=1 |
| #Max amount of memory for YARN per node |
| yarn_nm_mem_mb=4096 |
| |
| [perf-medium] |
| accumulo_tserv_mem=3G |
| accumulo_dcache_size=1536M |
| accumulo_icache_size=512M |
| accumulo_imap_size=512M |
| fluo_worker_mem_mb=4096 |
| fluo_worker_threads=64 |
| fluo_worker_instances_multiplier=1 |
| yarn_nm_mem_mb=8192 |
| |
| [perf-large] |
| accumulo_tserv_mem=4G |
| accumulo_dcache_size=2G |
| accumulo_icache_size=1G |
| accumulo_imap_size=512M |
| fluo_worker_mem_mb=4096 |
| fluo_worker_threads=64 |
| fluo_worker_instances_multiplier=2 |
| yarn_nm_mem_mb=16384 |
| |
| [apps] |
| ### |
| # This section is used to configure Fluo applications which are indentified by a unique application |
| # name (which replaces <app> below). Each test has the following configuration settings: |
| # |
| # <app>_repo # Git repo containing code for test. It is cloned when the test is run. |
| # <app>_branch # Branch to checkout in git repo |
| # <app>_command # Test script in repo that starts test |
| # |
| # Applications can be run using the following command 'fluo-deploy run -a <app>` |
| # Arguments can be added after <app> and will be passed to configured commands for that app. |
| # Any files or directories referenced by arguments must exist on leader node. |
| |
| # Stress test application |
| # Usage: fluo-deploy run -a stress |
| # Notes: - Stress test can take up to one hour to complete |
| # - Status will be output to console |
| # - Logs are written to install/tests/stress/logs |
| stress_repo=https://github.com/fluo-io/fluo-stress |
| stress_branch=master |
| stress_command=bin/run-test.sh |
| |
| # Phrasecount application |
| # Usage: fluo-deploy run -a phrasecount --args <txtDocsDir> |
| # Notes: - The directory <txtDocsDir> must exist on leader node |
| # - Cluster cannot be set up with multiple zookeepers until FLUO-521 is resolved. |
| phrasecount_repo=https://github.com/fluo-io/phrasecount |
| phrasecount_branch=master |
| phrasecount_command=bin/run.sh |
| |
| # Webindex application |
| # Usage: fluo-deploy run -a webindex --args <args> |
| # Notes: - If setting multiple args, wrap them in quotes |
| # - Set <args> to 'test-id load1 -fg' to load 1 Common Crawl file. |
| webindex_repo=https://github.com/fluo-io/webindex |
| webindex_branch=master |
| webindex_command=bin/webindex |
| |
| [nodes] |
| # Describes nodes in cluster in the following format: |
| # <Hostname> = <Service1>[,<Service2>,<Service3>] |
| # Where: |
| # Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster |
| # Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, worker, fluo, metrics) |
| # All services are required below except for fluo & metrics which are optional |
| leader1 = namenode,zookeeper,fluo |
| leader2 = resourcemanager,zookeeper |
| leader3 = accumulomaster,zookeeper |
| metrics = metrics |
| worker1 = worker |
| worker2 = worker |
| worker3 = worker |