| #!/bin/sh |
| |
| #/* |
| # * Licensed to the Apache Software Foundation (ASF) under one |
| # * or more contributor license agreements. See the NOTICE file |
| # * distributed with this work for additional information |
| # * regarding copyright ownership. The ASF licenses this file |
| # * to you under the Apache License, Version 2.0 (the |
| # * "License"); you may not use this file except in compliance |
| # * with the License. You may obtain a copy of the License at |
| # * |
| # * http://www.apache.org/licenses/LICENSE-2.0 |
| # * |
| # * Unless required by applicable law or agreed to in writing, software |
| # * distributed under the License is distributed on an "AS IS" BASIS, |
| # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # * See the License for the specific language governing permissions and |
| # * limitations under the License. |
| # */ |
| |
| cd `dirname ${0}`; |
| |
| # Slurp in all our user-customizable settings. |
| source ./gangliaEnv.sh; |
| |
| # Get access to Ganglia-wide constants etc. |
| source ./gangliaLib.sh; |
| |
| GMOND_BIN=/usr/sbin/gmond; |
| GMOND_CORE_CONF_FILE=gmond.core.conf; |
| GMOND_MASTER_CONF_FILE=gmond.master.conf; |
| GMOND_SLAVE_CONF_FILE=gmond.slave.conf; |
| GMOND_PID_FILE=gmond.pid; |
| |
| # Functions. |
| function getGmondCoreConfFileName() |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| # ${clusterName} is not empty. |
| echo "${GANGLIA_CONF_DIR}/${clusterName}/${GMOND_CORE_CONF_FILE}"; |
| else |
| echo "${GANGLIA_CONF_DIR}/${GMOND_CORE_CONF_FILE}"; |
| fi |
| } |
| |
| function getGmondMasterConfFileName() |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| # ${clusterName} is not empty. |
| echo "${GANGLIA_CONF_DIR}/${clusterName}/conf.d/${GMOND_MASTER_CONF_FILE}"; |
| else |
| echo "${GANGLIA_CONF_DIR}/conf.d/${GMOND_MASTER_CONF_FILE}"; |
| fi |
| } |
| |
| function getGmondSlaveConfFileName() |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| # ${clusterName} is not empty. |
| echo "${GANGLIA_CONF_DIR}/${clusterName}/conf.d/${GMOND_SLAVE_CONF_FILE}"; |
| else |
| echo "${GANGLIA_CONF_DIR}/conf.d/${GMOND_SLAVE_CONF_FILE}"; |
| fi |
| } |
| |
| function getGmondPidFileName() |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| # ${clusterName} is not empty. |
| echo "${GANGLIA_RUNTIME_DIR}/${clusterName}/${GMOND_PID_FILE}"; |
| else |
| echo "${GANGLIA_RUNTIME_DIR}/${GMOND_PID_FILE}"; |
| fi |
| } |
| |
| function getGmondLoggedPid() |
| { |
| gmondPidFile=`getGmondPidFileName ${1}`; |
| |
| if [ -e "${gmondPidFile}" ] |
| then |
| echo `cat ${gmondPidFile}`; |
| fi |
| } |
| |
| function getGmondRunningPid() |
| { |
| gmondLoggedPid=`getGmondLoggedPid ${1}`; |
| |
| if [ -n "${gmondLoggedPid}" ] |
| then |
| echo `ps -o pid=MYPID -p ${gmondLoggedPid} | tail -1 | awk '{print $1}' | grep -v MYPID`; |
| fi |
| } |
| |
| function generateGmondCoreConf() |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| read gmondClusterName gmondMasterIP gmondPort <<<`getGangliaClusterInfo ${clusterName}`; |
| |
| # Check that all of ${gmondClusterName} and ${gmondMasterIP} and ${gmondPort} are populated. |
| if [ "x" != "x${gmondClusterName}" -a "x" != "x${gmondMasterIP}" -a "x" != "x${gmondPort}" ] |
| then |
| now=`date`; |
| |
| cat << END_OF_GMOND_CORE_CONF |
| #################### Generated by ${0} on ${now} #################### |
| # |
| /* This configuration is as close to 2.5.x default behavior as possible |
| The values closely match ./gmond/metric.h definitions in 2.5.x */ |
| globals { |
| daemonize = yes |
| setuid = yes |
| user = ${GMOND_USER} |
| debug_level = 0 |
| max_udp_msg_len = 1472 |
| mute = no |
| deaf = no |
| allow_extra_data = yes |
| host_dmax = 0 /*secs */ |
| host_tmax = 20 /*secs */ |
| cleanup_threshold = 300 /*secs */ |
| gexec = no |
| send_metadata_interval = 30 /*secs */ |
| } |
| |
| /* |
| * The cluster attributes specified will be used as part of the <CLUSTER> |
| * tag that will wrap all hosts collected by this instance. |
| */ |
| cluster { |
| name = "${gmondClusterName}" |
| owner = "unspecified" |
| latlong = "unspecified" |
| url = "unspecified" |
| } |
| |
| /* The host section describes attributes of the host, like the location */ |
| host { |
| location = "unspecified" |
| } |
| |
| /* You can specify as many tcp_accept_channels as you like to share |
| * an XML description of the state of the cluster. |
| * |
| * At the very least, every gmond must expose its XML state to |
| * queriers from localhost. |
| */ |
| tcp_accept_channel { |
| bind = localhost |
| port = ${gmondPort} |
| } |
| |
| /* Each metrics module that is referenced by gmond must be specified and |
| loaded. If the module has been statically linked with gmond, it does |
| not require a load path. However all dynamically loadable modules must |
| include a load path. */ |
| modules { |
| module { |
| name = "core_metrics" |
| } |
| module { |
| name = "cpu_module" |
| path = "modcpu.so" |
| } |
| module { |
| name = "disk_module" |
| path = "moddisk.so" |
| } |
| module { |
| name = "load_module" |
| path = "modload.so" |
| } |
| module { |
| name = "mem_module" |
| path = "modmem.so" |
| } |
| module { |
| name = "net_module" |
| path = "modnet.so" |
| } |
| module { |
| name = "proc_module" |
| path = "modproc.so" |
| } |
| module { |
| name = "sys_module" |
| path = "modsys.so" |
| } |
| } |
| |
| /* The old internal 2.5.x metric array has been replaced by the following |
| collection_group directives. What follows is the default behavior for |
| collecting and sending metrics that is as close to 2.5.x behavior as |
| possible. */ |
| |
| /* This collection group will cause a heartbeat (or beacon) to be sent every |
| 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses |
| the age of the running gmond. */ |
| collection_group { |
| collect_once = yes |
| time_threshold = 20 |
| metric { |
| name = "heartbeat" |
| } |
| } |
| |
| /* This collection group will send general info about this host total memory every |
| 180 secs. |
| This information doesn't change between reboots and is only collected |
| once. This information needed for heatmap showing */ |
| collection_group { |
| collect_once = yes |
| time_threshold = 180 |
| metric { |
| name = "mem_total" |
| title = "Memory Total" |
| } |
| } |
| |
| /* This collection group will send general info about this host every |
| 1200 secs. |
| This information doesn't change between reboots and is only collected |
| once. */ |
| collection_group { |
| collect_once = yes |
| time_threshold = 1200 |
| metric { |
| name = "cpu_num" |
| title = "CPU Count" |
| } |
| metric { |
| name = "cpu_speed" |
| title = "CPU Speed" |
| } |
| /* Should this be here? Swap can be added/removed between reboots. */ |
| metric { |
| name = "swap_total" |
| title = "Swap Space Total" |
| } |
| metric { |
| name = "boottime" |
| title = "Last Boot Time" |
| } |
| metric { |
| name = "machine_type" |
| title = "Machine Type" |
| } |
| metric { |
| name = "os_name" |
| title = "Operating System" |
| } |
| metric { |
| name = "os_release" |
| title = "Operating System Release" |
| } |
| metric { |
| name = "location" |
| title = "Location" |
| } |
| } |
| |
| /* This collection group will send the status of gexecd for this host |
| every 300 secs.*/ |
| /* Unlike 2.5.x the default behavior is to report gexecd OFF. */ |
| collection_group { |
| collect_once = yes |
| time_threshold = 300 |
| metric { |
| name = "gexec" |
| title = "Gexec Status" |
| } |
| } |
| |
| /* This collection group will collect the CPU status info every 20 secs. |
| The time threshold is set to 90 seconds. In honesty, this |
| time_threshold could be set significantly higher to reduce |
| unneccessary network chatter. */ |
| collection_group { |
| collect_every = 20 |
| time_threshold = 90 |
| /* CPU status */ |
| metric { |
| name = "cpu_user" |
| value_threshold = "1.0" |
| title = "CPU User" |
| } |
| metric { |
| name = "cpu_system" |
| value_threshold = "1.0" |
| title = "CPU System" |
| } |
| metric { |
| name = "cpu_idle" |
| value_threshold = "5.0" |
| title = "CPU Idle" |
| } |
| metric { |
| name = "cpu_nice" |
| value_threshold = "1.0" |
| title = "CPU Nice" |
| } |
| metric { |
| name = "cpu_aidle" |
| value_threshold = "5.0" |
| title = "CPU aidle" |
| } |
| metric { |
| name = "cpu_wio" |
| value_threshold = "1.0" |
| title = "CPU wio" |
| } |
| /* The next two metrics are optional if you want more detail... |
| ... since they are accounted for in cpu_system. |
| metric { |
| name = "cpu_intr" |
| value_threshold = "1.0" |
| title = "CPU intr" |
| } |
| metric { |
| name = "cpu_sintr" |
| value_threshold = "1.0" |
| title = "CPU sintr" |
| } |
| */ |
| } |
| |
| collection_group { |
| collect_every = 20 |
| time_threshold = 90 |
| /* Load Averages */ |
| metric { |
| name = "load_one" |
| value_threshold = "1.0" |
| title = "One Minute Load Average" |
| } |
| metric { |
| name = "load_five" |
| value_threshold = "1.0" |
| title = "Five Minute Load Average" |
| } |
| metric { |
| name = "load_fifteen" |
| value_threshold = "1.0" |
| title = "Fifteen Minute Load Average" |
| } |
| } |
| |
| /* This group collects the number of running and total processes */ |
| collection_group { |
| collect_every = 80 |
| time_threshold = 950 |
| metric { |
| name = "proc_run" |
| value_threshold = "1.0" |
| title = "Total Running Processes" |
| } |
| metric { |
| name = "proc_total" |
| value_threshold = "1.0" |
| title = "Total Processes" |
| } |
| } |
| |
| /* This collection group grabs the volatile memory metrics every 40 secs and |
| sends them at least every 180 secs. This time_threshold can be increased |
| significantly to reduce unneeded network traffic. */ |
| collection_group { |
| collect_every = 40 |
| time_threshold = 180 |
| metric { |
| name = "mem_free" |
| value_threshold = "1024.0" |
| title = "Free Memory" |
| } |
| metric { |
| name = "mem_shared" |
| value_threshold = "1024.0" |
| title = "Shared Memory" |
| } |
| metric { |
| name = "mem_buffers" |
| value_threshold = "1024.0" |
| title = "Memory Buffers" |
| } |
| metric { |
| name = "mem_cached" |
| value_threshold = "1024.0" |
| title = "Cached Memory" |
| } |
| metric { |
| name = "swap_free" |
| value_threshold = "1024.0" |
| title = "Free Swap Space" |
| } |
| } |
| |
| collection_group { |
| collect_every = 40 |
| time_threshold = 300 |
| metric { |
| name = "bytes_out" |
| value_threshold = 4096 |
| title = "Bytes Sent" |
| } |
| metric { |
| name = "bytes_in" |
| value_threshold = 4096 |
| title = "Bytes Received" |
| } |
| metric { |
| name = "pkts_in" |
| value_threshold = 256 |
| title = "Packets Received" |
| } |
| metric { |
| name = "pkts_out" |
| value_threshold = 256 |
| title = "Packets Sent" |
| } |
| } |
| |
| |
| collection_group { |
| collect_every = 40 |
| time_threshold = 180 |
| metric { |
| name = "disk_free" |
| value_threshold = 1.0 |
| title = "Disk Space Available" |
| } |
| metric { |
| name = "part_max_used" |
| value_threshold = 1.0 |
| title = "Maximum Disk Space Used" |
| } |
| metric { |
| name = "disk_total" |
| value_threshold = 1.0 |
| title = "Total Disk Space" |
| } |
| } |
| |
| include ("${GANGLIA_CONF_DIR}/${gmondClusterName}/conf.d/*.conf") |
| END_OF_GMOND_CORE_CONF |
| else |
| return 2; |
| fi |
| else |
| return 1; |
| fi |
| } |
| |
| function generateGmondMasterConf |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| read gmondClusterName gmondMasterIP gmondPort <<<`getGangliaClusterInfo ${clusterName}`; |
| |
| # Check that all of ${gmondClusterName} and ${gmondMasterIP} and ${gmondPort} are populated. |
| if [ "x" != "x${gmondClusterName}" -a "x" != "x${gmondMasterIP}" -a "x" != "x${gmondPort}" ] |
| then |
| now=`date`; |
| |
| cat << END_OF_GMOND_MASTER_CONF |
| #################### Generated by ${0} on ${now} #################### |
| /* Masters only receive; they never send. */ |
| udp_recv_channel { |
| bind = ${gmondMasterIP} |
| port = ${gmondPort} |
| } |
| |
| /* The gmond cluster master must additionally provide an XML |
| * description of the cluster to the gmetad that will query it. |
| */ |
| tcp_accept_channel { |
| bind = ${gmondMasterIP} |
| port = ${gmondPort} |
| } |
| END_OF_GMOND_MASTER_CONF |
| else |
| return 2; |
| fi |
| else |
| return 1; |
| fi |
| } |
| |
| function generateGmondSlaveConf |
| { |
| clusterName=${1}; |
| |
| if [ "x" != "x${clusterName}" ] |
| then |
| read gmondClusterName gmondMasterIP gmondPort <<<`getGangliaClusterInfo ${clusterName}`; |
| |
| # Check that all of ${gmondClusterName} and ${gmondMasterIP} and ${gmondPort} are populated. |
| if [ "x" != "x${gmondClusterName}" -a "x" != "x${gmondMasterIP}" -a "x" != "x${gmondPort}" ] |
| then |
| now=`date`; |
| |
| cat << END_OF_GMOND_SLAVE_CONF |
| #################### Generated by ${0} on ${now} #################### |
| /* Slaves only send; they never receive. */ |
| udp_send_channel { |
| #bind_hostname = yes # Highly recommended, soon to be default. |
| # This option tells gmond to use a source address |
| # that resolves to the machine's hostname. Without |
| # this, the metrics may appear to come from any |
| # interface and the DNS names associated with |
| # those IPs will be used to create the RRDs. |
| host = ${gmondMasterIP} |
| port = ${gmondPort} |
| ttl = 1 |
| } |
| END_OF_GMOND_SLAVE_CONF |
| else |
| return 2; |
| fi |
| else |
| return 1; |
| fi |
| } |