Merge pull request #93 from mikewalch/fd-90 Closes #90 #92 - Add system monitoring to Grafana/InfluxDB

commit: 14215762f05cfe3c38f2082d2c189b82935a917e [log] [tgz]
author: Mike Walch <mwalch@gmail.com> Thu Dec 17 12:36:54 2015 -0500
committer: Mike Walch <mwalch@gmail.com> Thu Dec 17 12:36:54 2015 -0500
tree: 2baeb82b2c5dfa00e0385c4a1284ce835ccaaaf1
parent: 9ca0231dfe559cec15bb1db68f830c84866c1156 [diff]
parent: 3db32c02e3fe9d4b649c11fd1df2fd44322e3ae8 [diff]
diff --git a/bin/impl/fluo_deploy/config.py b/bin/impl/fluo_deploy/config.py
index e2be299..d558b4f 100644
--- a/bin/impl/fluo_deploy/config.py
+++ b/bin/impl/fluo_deploy/config.py

@@ -312,6 +312,10 @@
   def proxy_private_ip(self):
     return self.get_private_ip(self.proxy_hostname())
 
+  def get_performance_prop(self, prop):
+    profile = self.get('performance', 'profile')
+    return self.get(profile, prop)
+
   def print_all(self):
     print 'proxy.public.ip = ', self.proxy_public_ip()
     for (name, val) in self.items('general'):

diff --git a/bin/impl/fluo_deploy/main.py b/bin/impl/fluo_deploy/main.py
index cd75d3a..0b91ade 100644
--- a/bin/impl/fluo_deploy/main.py
+++ b/bin/impl/fluo_deploy/main.py

@@ -260,12 +260,20 @@
   sub_d["RESOURCEMANAGER_HOST"] = config.get_service_hostnames("resourcemanager")[0]
   sub_d["RESOURCEMANAGER_IP"] = config.get_service_private_ips("resourcemanager")[0]
   sub_d["ACCUMULOMASTER_HOST"] = config.get_service_hostnames("accumulomaster")[0]
-  sub_d["NUM_WORKERS"] = len(config.get_service_hostnames("worker"))
+  sub_d["NUM_WORKERS"] = str(int(len(config.get_service_hostnames("worker"))) * int(config.get_performance_prop("fluo.worker.instances.multiplier")))
   sub_d["DATANODE_DIRS"] = config.worker_ephemeral_dirs("/hadoop/data")
   sub_d["MAPRED_TEMP_DIRS"] = config.worker_ephemeral_dirs("/hadoop/mapred/temp")
   sub_d["MAPRED_LOCAL_DIRS"] = config.worker_ephemeral_dirs("/hadoop/mapred/local")
   sub_d["YARN_LOCAL_DIRS"] = config.worker_ephemeral_dirs("/hadoop/yarn/local")
 
+  sub_d["ACCUMULO_TSERV_MEM"]=config.get_performance_prop("accumulo.tserv.mem");
+  sub_d["ACCUMULO_DCACHE_SIZE"]=config.get_performance_prop("accumulo.dcache.size");
+  sub_d["ACCUMULO_ICACHE_SIZE"]=config.get_performance_prop("accumulo.icache.size");
+  sub_d["ACCUMULO_IMAP_SIZE"]=config.get_performance_prop("accumulo.imap.size");
+  sub_d["FLUO_WORKER_MEM_MB"]=config.get_performance_prop("fluo.worker.mem.mb");
+  sub_d["FLUO_WORKER_THREADS"]=config.get_performance_prop("fluo.worker.threads");
+  sub_d["YARN_NM_MEM_MB"]=config.get_performance_prop("yarn.nm.mem.mb");
+
   sub_d["SETUP_METRICS"] = "false"
   if config.has_service("metrics"):
     sub_d["SETUP_METRICS"] = "true"

diff --git a/cluster/templates/fluo-cluster/conf/accumulo-env.sh b/cluster/templates/fluo-cluster/conf/accumulo-env.sh
index 6d1dd80..0d6f011 100755
--- a/cluster/templates/fluo-cluster/conf/accumulo-env.sh
+++ b/cluster/templates/fluo-cluster/conf/accumulo-env.sh

@@ -23,7 +23,7 @@
 then
    POLICY="-Djava.security.manager -Djava.security.policy=$${ACCUMULO_CONF_DIR}/accumulo.policy"
 fi
-export ACCUMULO_TSERVER_OPTS="$${POLICY} -Xmx2G -Xms2G "
+export ACCUMULO_TSERVER_OPTS="$${POLICY} -Xmx$ACCUMULO_TSERV_MEM -Xms$ACCUMULO_TSERV_MEM"
 export ACCUMULO_MASTER_OPTS="$${POLICY} -Xmx256m -Xms256m"
 export ACCUMULO_MONITOR_OPTS="$${POLICY} -Xmx128m -Xms64m"
 export ACCUMULO_GC_OPTS="-Xmx128m -Xms128m"

diff --git a/cluster/templates/fluo-cluster/conf/accumulo-site.xml b/cluster/templates/fluo-cluster/conf/accumulo-site.xml
index 8c3b2a4..f4a7e48 100644
--- a/cluster/templates/fluo-cluster/conf/accumulo-site.xml
+++ b/cluster/templates/fluo-cluster/conf/accumulo-site.xml

@@ -28,23 +28,33 @@
   </property>
 
   <property>
+    <name>table.durability</name>
+    <value>flush</value>
+  </property>
+
+  <property>
     <name>tserver.server.threads.minimum</name>
     <value>64</value>
   </property>
 
   <property>
+    <name>tserver.readahead.concurrent.max</name>
+    <value>64</value>
+  </property>
+
+  <property>
     <name>tserver.cache.data.size</name>
-    <value>512M</value>
+    <value>$ACCUMULO_DCACHE_SIZE</value>
   </property>
 
   <property>
     <name>tserver.cache.index.size</name>
-    <value>512M</value>
+    <value>$ACCUMULO_ICACHE_SIZE</value>
   </property>
 
   <property>
     <name>tserver.memory.maps.max</name>
-    <value>1G</value>
+    <value>$ACCUMULO_IMAP_SIZE</value>
   </property>
 
   <property>

diff --git a/cluster/templates/fluo-cluster/conf/fluo.properties b/cluster/templates/fluo-cluster/conf/fluo.properties
index e34cf76..5770c96 100644
--- a/cluster/templates/fluo-cluster/conf/fluo.properties
+++ b/cluster/templates/fluo-cluster/conf/fluo.properties

@@ -76,9 +76,9 @@
 # Number of worker yarn instances
 io.fluo.worker.instances=$NUM_WORKERS
 # Number of threads in each worker instance
-#io.fluo.worker.num.threads=10
+io.fluo.worker.num.threads=$FLUO_WORKER_THREADS
 # Max memory of Worker yarn containers (in MB)
-#io.fluo.worker.max.memory.mb=1024
+io.fluo.worker.max.memory.mb=$FLUO_WORKER_MEM_MB
 # Number of worker virtual cores
 #io.fluo.worker.num.cores=1
 

diff --git a/cluster/templates/fluo-cluster/conf/yarn-site.xml b/cluster/templates/fluo-cluster/conf/yarn-site.xml
index 49ed111..06a2f9d 100644
--- a/cluster/templates/fluo-cluster/conf/yarn-site.xml
+++ b/cluster/templates/fluo-cluster/conf/yarn-site.xml

@@ -43,7 +43,7 @@
   </property>
   <property>
     <name>yarn.nodemanager.resource.memory-mb</name>
-    <value>8192</value>
+    <value>$YARN_NM_MEM_MB</value>
   </property>
   <property>
     <name>yarn.scheduler.minimum-allocation-mb</name>

diff --git a/conf/fluo-deploy.props.example b/conf/fluo-deploy.props.example
index eac3b38..31a8681 100644
--- a/conf/fluo-deploy.props.example
+++ b/conf/fluo-deploy.props.example

@@ -77,6 +77,53 @@
 #caution.  See issue #59
 ebs.root.size = 0
 
+[performance]
+#Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
+#creating a performance profile.  Try not to use more memory than each node has 
+#and leave some space for the OS.
+profile=perf-small
+
+#Below are different performance profiles that can be selected.  Each profile
+#has the same properties with different values.
+
+[perf-small]
+#amount of JVM heap for each tserver
+accumulo.tserv.mem=2G
+#amount of data cache for each tserver
+accumulo.dcache.size=768M
+#amount of index cache for each tserver
+accumulo.icache.size=256M
+#in memory map size for each tserver
+accumulo.imap.size=512M
+#amount of JVM heap for each Fluo worker
+fluo.worker.mem.mb=2048
+#Number of threads for each Flup worker
+fluo.worker.threads=20
+#Number of worker to run per node
+fluo.worker.instances.multiplier=1
+#Max amount of memory for YARN per node
+yarn.nm.mem.mb=4096
+
+[perf-medium]
+accumulo.tserv.mem=3G
+accumulo.dcache.size=1536M
+accumulo.icache.size=512M
+accumulo.imap.size=512M
+fluo.worker.mem.mb=4096
+fluo.worker.threads=64
+fluo.worker.instances.multiplier=1
+yarn.nm.mem.mb=8192
+
+[perf-large]
+accumulo.tserv.mem=4G
+accumulo.dcache.size=2G
+accumulo.icache.size=1G
+accumulo.imap.size=512M
+fluo.worker.mem.mb=4096
+fluo.worker.threads=64
+fluo.worker.instances.multiplier=2
+yarn.nm.mem.mb=16384
+
 [apps]
 ###
 # This section is used to configure Fluo applications which are indentified by a unique application
commit	14215762f05cfe3c38f2082d2c189b82935a917e	[log] [tgz]
author	Mike Walch <mwalch@gmail.com>	Thu Dec 17 12:36:54 2015 -0500
committer	Mike Walch <mwalch@gmail.com>	Thu Dec 17 12:36:54 2015 -0500
tree	2baeb82b2c5dfa00e0385c4a1284ce835ccaaaf1
parent	9ca0231dfe559cec15bb1db68f830c84866c1156 [diff]
parent	3db32c02e3fe9d4b649c11fd1df2fd44322e3ae8 [diff]