blob: fe062cfff4f920aa078015fd7c1700bc0d1d2df2 [file] [log] [blame]
#!/bin/sh
#/*
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
# * regarding copyright ownership. The ASF licenses this file
# * to you under the Apache License, Version 2.0 (the
# * "License"); you may not use this file except in compliance
# * with the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
cd `dirname ${0}`;
# Slurp in all our user-customizable settings.
source ./gangliaEnv.sh;
# Get access to Ganglia-wide constants etc.
source ./gangliaLib.sh;
GMOND_BIN=/usr/sbin/gmond;
GMOND_CORE_CONF_FILE=gmond.core.conf;
GMOND_MASTER_CONF_FILE=gmond.master.conf;
GMOND_SLAVE_CONF_FILE=gmond.slave.conf;
GMOND_PID_FILE=gmond.pid;
# Functions.
function getGmondCoreConfFileName()
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
# ${clusterName} is not empty.
echo "${GANGLIA_CONF_DIR}/${clusterName}/${GMOND_CORE_CONF_FILE}";
else
echo "${GANGLIA_CONF_DIR}/${GMOND_CORE_CONF_FILE}";
fi
}
function getGmondMasterConfFileName()
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
# ${clusterName} is not empty.
echo "${GANGLIA_CONF_DIR}/${clusterName}/conf.d/${GMOND_MASTER_CONF_FILE}";
else
echo "${GANGLIA_CONF_DIR}/conf.d/${GMOND_MASTER_CONF_FILE}";
fi
}
function getGmondSlaveConfFileName()
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
# ${clusterName} is not empty.
echo "${GANGLIA_CONF_DIR}/${clusterName}/conf.d/${GMOND_SLAVE_CONF_FILE}";
else
echo "${GANGLIA_CONF_DIR}/conf.d/${GMOND_SLAVE_CONF_FILE}";
fi
}
function getGmondPidFileName()
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
# ${clusterName} is not empty.
echo "${GANGLIA_RUNTIME_DIR}/${clusterName}/${GMOND_PID_FILE}";
else
echo "${GANGLIA_RUNTIME_DIR}/${GMOND_PID_FILE}";
fi
}
function getGmondLoggedPid()
{
gmondPidFile=`getGmondPidFileName ${1}`;
if [ -e "${gmondPidFile}" ]
then
echo `cat ${gmondPidFile}`;
fi
}
function getGmondRunningPid()
{
gmondLoggedPid=`getGmondLoggedPid ${1}`;
if [ -n "${gmondLoggedPid}" ]
then
echo `ps -o pid=MYPID -p ${gmondLoggedPid} | tail -1 | awk '{print $1}' | grep -v MYPID`;
fi
}
function generateGmondCoreConf()
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
read gmondClusterName gmondMasterIP gmondPort <<<`getGangliaClusterInfo ${clusterName}`;
# Check that all of ${gmondClusterName} and ${gmondMasterIP} and ${gmondPort} are populated.
if [ "x" != "x${gmondClusterName}" -a "x" != "x${gmondMasterIP}" -a "x" != "x${gmondPort}" ]
then
now=`date`;
cat << END_OF_GMOND_CORE_CONF
#################### Generated by ${0} on ${now} ####################
#
/* This configuration is as close to 2.5.x default behavior as possible
The values closely match ./gmond/metric.h definitions in 2.5.x */
globals {
daemonize = yes
setuid = yes
user = ${GMOND_USER}
debug_level = 0
max_udp_msg_len = 1472
mute = no
deaf = no
allow_extra_data = yes
host_dmax = 0 /*secs */
host_tmax = 20 /*secs */
cleanup_threshold = 300 /*secs */
gexec = no
send_metadata_interval = 30 /*secs */
}
/*
* The cluster attributes specified will be used as part of the <CLUSTER>
* tag that will wrap all hosts collected by this instance.
*/
cluster {
name = "${gmondClusterName}"
owner = "unspecified"
latlong = "unspecified"
url = "unspecified"
}
/* The host section describes attributes of the host, like the location */
host {
location = "unspecified"
}
/* You can specify as many tcp_accept_channels as you like to share
* an XML description of the state of the cluster.
*
* At the very least, every gmond must expose its XML state to
* queriers from localhost.
*/
tcp_accept_channel {
bind = localhost
port = ${gmondPort}
}
/* Each metrics module that is referenced by gmond must be specified and
loaded. If the module has been statically linked with gmond, it does
not require a load path. However all dynamically loadable modules must
include a load path. */
modules {
module {
name = "core_metrics"
}
module {
name = "cpu_module"
path = "modcpu.so"
}
module {
name = "disk_module"
path = "moddisk.so"
}
module {
name = "load_module"
path = "modload.so"
}
module {
name = "mem_module"
path = "modmem.so"
}
module {
name = "net_module"
path = "modnet.so"
}
module {
name = "proc_module"
path = "modproc.so"
}
module {
name = "sys_module"
path = "modsys.so"
}
}
/* The old internal 2.5.x metric array has been replaced by the following
collection_group directives. What follows is the default behavior for
collecting and sending metrics that is as close to 2.5.x behavior as
possible. */
/* This collection group will cause a heartbeat (or beacon) to be sent every
20 seconds. In the heartbeat is the GMOND_STARTED data which expresses
the age of the running gmond. */
collection_group {
collect_once = yes
time_threshold = 20
metric {
name = "heartbeat"
}
}
/* This collection group will send general info about this host every
1200 secs.
This information doesn't change between reboots and is only collected
once. */
collection_group {
collect_once = yes
time_threshold = 1200
metric {
name = "cpu_num"
title = "CPU Count"
}
metric {
name = "cpu_speed"
title = "CPU Speed"
}
metric {
name = "mem_total"
title = "Memory Total"
}
/* Should this be here? Swap can be added/removed between reboots. */
metric {
name = "swap_total"
title = "Swap Space Total"
}
metric {
name = "boottime"
title = "Last Boot Time"
}
metric {
name = "machine_type"
title = "Machine Type"
}
metric {
name = "os_name"
title = "Operating System"
}
metric {
name = "os_release"
title = "Operating System Release"
}
metric {
name = "location"
title = "Location"
}
}
/* This collection group will send the status of gexecd for this host
every 300 secs.*/
/* Unlike 2.5.x the default behavior is to report gexecd OFF. */
collection_group {
collect_once = yes
time_threshold = 300
metric {
name = "gexec"
title = "Gexec Status"
}
}
/* This collection group will collect the CPU status info every 20 secs.
The time threshold is set to 90 seconds. In honesty, this
time_threshold could be set significantly higher to reduce
unneccessary network chatter. */
collection_group {
collect_every = 20
time_threshold = 90
/* CPU status */
metric {
name = "cpu_user"
value_threshold = "1.0"
title = "CPU User"
}
metric {
name = "cpu_system"
value_threshold = "1.0"
title = "CPU System"
}
metric {
name = "cpu_idle"
value_threshold = "5.0"
title = "CPU Idle"
}
metric {
name = "cpu_nice"
value_threshold = "1.0"
title = "CPU Nice"
}
metric {
name = "cpu_aidle"
value_threshold = "5.0"
title = "CPU aidle"
}
metric {
name = "cpu_wio"
value_threshold = "1.0"
title = "CPU wio"
}
/* The next two metrics are optional if you want more detail...
... since they are accounted for in cpu_system.
metric {
name = "cpu_intr"
value_threshold = "1.0"
title = "CPU intr"
}
metric {
name = "cpu_sintr"
value_threshold = "1.0"
title = "CPU sintr"
}
*/
}
collection_group {
collect_every = 20
time_threshold = 90
/* Load Averages */
metric {
name = "load_one"
value_threshold = "1.0"
title = "One Minute Load Average"
}
metric {
name = "load_five"
value_threshold = "1.0"
title = "Five Minute Load Average"
}
metric {
name = "load_fifteen"
value_threshold = "1.0"
title = "Fifteen Minute Load Average"
}
}
/* This group collects the number of running and total processes */
collection_group {
collect_every = 80
time_threshold = 950
metric {
name = "proc_run"
value_threshold = "1.0"
title = "Total Running Processes"
}
metric {
name = "proc_total"
value_threshold = "1.0"
title = "Total Processes"
}
}
/* This collection group grabs the volatile memory metrics every 40 secs and
sends them at least every 180 secs. This time_threshold can be increased
significantly to reduce unneeded network traffic. */
collection_group {
collect_every = 40
time_threshold = 180
metric {
name = "mem_free"
value_threshold = "1024.0"
title = "Free Memory"
}
metric {
name = "mem_shared"
value_threshold = "1024.0"
title = "Shared Memory"
}
metric {
name = "mem_buffers"
value_threshold = "1024.0"
title = "Memory Buffers"
}
metric {
name = "mem_cached"
value_threshold = "1024.0"
title = "Cached Memory"
}
metric {
name = "swap_free"
value_threshold = "1024.0"
title = "Free Swap Space"
}
}
collection_group {
collect_every = 40
time_threshold = 300
metric {
name = "bytes_out"
value_threshold = 4096
title = "Bytes Sent"
}
metric {
name = "bytes_in"
value_threshold = 4096
title = "Bytes Received"
}
metric {
name = "pkts_in"
value_threshold = 256
title = "Packets Received"
}
metric {
name = "pkts_out"
value_threshold = 256
title = "Packets Sent"
}
}
/* Different than 2.5.x default since the old config made no sense */
collection_group {
collect_every = 1800
time_threshold = 3600
metric {
name = "disk_total"
value_threshold = 1.0
title = "Total Disk Space"
}
}
collection_group {
collect_every = 40
time_threshold = 180
metric {
name = "disk_free"
value_threshold = 1.0
title = "Disk Space Available"
}
metric {
name = "part_max_used"
value_threshold = 1.0
title = "Maximum Disk Space Used"
}
}
include ("${GANGLIA_CONF_DIR}/${gmondClusterName}/conf.d/*.conf")
END_OF_GMOND_CORE_CONF
else
return 2;
fi
else
return 1;
fi
}
function generateGmondMasterConf
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
read gmondClusterName gmondMasterIP gmondPort <<<`getGangliaClusterInfo ${clusterName}`;
# Check that all of ${gmondClusterName} and ${gmondMasterIP} and ${gmondPort} are populated.
if [ "x" != "x${gmondClusterName}" -a "x" != "x${gmondMasterIP}" -a "x" != "x${gmondPort}" ]
then
now=`date`;
cat << END_OF_GMOND_MASTER_CONF
#################### Generated by ${0} on ${now} ####################
/* Masters only receive; they never send. */
udp_recv_channel {
bind = ${gmondMasterIP}
port = ${gmondPort}
}
/* The gmond cluster master must additionally provide an XML
* description of the cluster to the gmetad that will query it.
*/
tcp_accept_channel {
bind = ${gmondMasterIP}
port = ${gmondPort}
}
END_OF_GMOND_MASTER_CONF
else
return 2;
fi
else
return 1;
fi
}
function generateGmondSlaveConf
{
clusterName=${1};
if [ "x" != "x${clusterName}" ]
then
read gmondClusterName gmondMasterIP gmondPort <<<`getGangliaClusterInfo ${clusterName}`;
# Check that all of ${gmondClusterName} and ${gmondMasterIP} and ${gmondPort} are populated.
if [ "x" != "x${gmondClusterName}" -a "x" != "x${gmondMasterIP}" -a "x" != "x${gmondPort}" ]
then
now=`date`;
cat << END_OF_GMOND_SLAVE_CONF
#################### Generated by ${0} on ${now} ####################
/* Slaves only send; they never receive. */
udp_send_channel {
#bind_hostname = yes # Highly recommended, soon to be default.
# This option tells gmond to use a source address
# that resolves to the machine's hostname. Without
# this, the metrics may appear to come from any
# interface and the DNS names associated with
# those IPs will be used to create the RRDs.
host = ${gmondMasterIP}
port = ${gmondPort}
ttl = 1
}
END_OF_GMOND_SLAVE_CONF
else
return 2;
fi
else
return 1;
fi
}