blob: b77e77bd7dc1c9fc2a3a8de8d134e039424460b8 [file] [log] [blame]
{#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#}
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
{# TODO: Look for { or } in created file #}
# NAGIOS SERVER Check (status log update)
{% if hostgroup_defs['nagios-server'] %}
define service {
name hadoop-service
use generic-service
notification_options w,u,c,r,f,s
first_notification_delay 0
notification_interval 0 # Send the notification once
contact_groups admins
notifications_enabled 1
event_handler_enabled 1
register 0
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description NAGIOS::Nagios status log freshness
servicegroups NAGIOS
check_command check_nagios!10!/var/nagios/status.dat!{{nagios_lookup_daemon_str}}
normal_check_interval 5
retry_check_interval 0.5
max_check_attempts 2
}
# NAGIOS SERVER HDFS Checks
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Percent DataNodes with space available
servicegroups HDFS
check_command check_aggregate!"DATANODE::DataNode space"!10%!30%
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Percent DataNodes live
servicegroups HDFS
check_command check_aggregate!"DATANODE::DataNode process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{# used only for HDP2 #}
{% if hostgroup_defs['namenode'] and dfs_ha_enabled %}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::NameNode HA Healthy
servicegroups HDFS
check_command check_namenodes_ha!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 5
}
{% endif %}
# AMBARI AGENT Checks
{% for hostname in all_hosts %}
define service {
host_name {{ hostname }}
use hadoop-service
service_description AMBARI::Ambari Agent process
servicegroups AMBARI
check_command check_tcp_wrapper!{{all_ping_ports[loop.index-1]}}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
# NAGIOS SERVER ZOOKEEPER Checks
{% if hostgroup_defs['zookeeper-servers'] %}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description ZOOKEEPER::Percent ZooKeeper Servers live
servicegroups ZOOKEEPER
check_command check_aggregate!"ZOOKEEPER::ZooKeeper Server process"!35%!70%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
# NAGIOS SERVER HBASE Checks
{% if hostgroup_defs['hbasemasters'] %}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HBASE::Percent RegionServers live
servicegroups HBASE
check_command check_aggregate!"REGIONSERVER::RegionServer process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% endif %}
# GANGLIA SERVER Checks
{% if hostgroup_defs['ganglia-server'] %}
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Server process
servicegroups GANGLIA
check_command check_tcp_wrapper!{{ ganglia_port }}!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
{% if hostgroup_defs['namenode'] %}
{% for hostname in hostgroup_defs['namenode'] %}
define service {
host_name {{ hostname }}
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for NameNode
servicegroups GANGLIA
check_command check_tcp_wrapper!{{ ganglia_collector_namenode_port }}!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
{% endif %}
{% if hostgroup_defs['jobtracker'] %}
{% for hostname in hostgroup_defs['jobtracker'] %}
define service {
host_name {{ hostname }}
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for JobTracker
servicegroups GANGLIA
check_command check_tcp_wrapper!{{ ganglia_collector_jobtracker_port }}!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
{% endif %}
{% if hostgroup_defs['hbasemasters'] %}
{% for hostname in hostgroup_defs['hbasemasters'] %}
define service {
host_name {{ hostname }}
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for HBase Master
servicegroups GANGLIA
check_command check_tcp_wrapper!{{ ganglia_collector_hbase_port }}!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
{% endif %}
{% if hostgroup_defs['resourcemanager'] %}
{% for hostname in hostgroup_defs['resourcemanager'] %}
define service {
host_name {{ hostname }}
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for ResourceManager
servicegroups GANGLIA
check_command check_tcp_wrapper!{{ ganglia_collector_rm_port }}!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
{% endif %}
{% if hostgroup_defs['historyserver2'] %}
{% for hostname in hostgroup_defs['historyserver2'] %}
define service {
host_name {{ hostname }}
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for HistoryServer
servicegroups GANGLIA
check_command check_tcp_wrapper!{{ ganglia_collector_hs_port }}!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
{% endif %}
{% endif %}
{% if hostgroup_defs['snamenode'] %}
# Secondary namenode checks
define service {
hostgroup_name snamenode
use hadoop-service
service_description NAMENODE::Secondary NameNode process
servicegroups HDFS
check_command check_tcp_wrapper!{{ snamenode_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['storm_ui'] %}
# STORM UI Checks
define service {
hostgroup_name storm_ui
use hadoop-service
service_description STORM_UI_SERVER::Storm UI on {{ hostgroup_defs['storm_ui'][0] }}
servicegroups STORM
check_command check_webui!storm_ui!{{ storm_ui_port }}
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['storm_ui'] %}
# STORM UI Checks
define service {
hostgroup_name storm_ui
use hadoop-service
service_description STORM_UI_SERVER::Storm UI Server process
servicegroups STORM
check_command check_tcp_wrapper!{{ storm_ui_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['nimbus'] %}
# Nimbus Checks
define service {
hostgroup_name nimbus
use hadoop-service
service_description NIMBUS::Nimbus process
servicegroups STORM
check_command check_tcp_wrapper!{{ nimbus_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['drpc-server'] %}
# drpc Checks
define service {
hostgroup_name drpc-server
use hadoop-service
service_description DRPC_SERVER::DRPC Server process
servicegroups STORM
check_command check_tcp_wrapper!{{ drpc_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['storm_rest_api'] %}
# Storm REST API Checks
define service {
hostgroup_name storm_rest_api
use hadoop-service
service_description STORM_REST_API::Storm REST API Server process
servicegroups STORM
check_command check_tcp_wrapper!{{ storm_rest_api_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
# NAGIOS SERVER Supervisor Checks
{% if hostgroup_defs['supervisors'] %}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description SUPERVISOR::Percent Supervisors live
servicegroups STORM
check_command check_aggregate!"SUPERVISOR::Supervisors process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
define service {
hostgroup_name supervisors
use hadoop-service
service_description SUPERVISOR::Supervisors process
servicegroups STORM
check_command check_tcp_wrapper!{{ supervisor_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['namenode'] %}
# HDFS Checks
{% for namenode_hostname in namenode_host %}
{# TODO: check if we can get rid of str, lower #}
define service {
host_name {{ namenode_hostname }}
use hadoop-service
service_description NAMENODE::NameNode edit logs directory status on {{ namenode_hostname }}
servicegroups HDFS
check_command check_name_dir_status!{{ namenode_port }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 0.5
retry_check_interval 0.5
max_check_attempts 3
}
{% if env.system.os_family != "suse" %}
define service {
host_name {{ namenode_hostname }}
use hadoop-service
service_description NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }}
servicegroups HDFS
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
{% endif %}
define service {
host_name {{ namenode_hostname }}
use hadoop-service
service_description NAMENODE::NameNode Web UI on {{ namenode_hostname }}
servicegroups HDFS
check_command check_webui!namenode!{{ namenode_port }}
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
define service {
host_name {{ namenode_hostname }}
use hadoop-service
service_description NAMENODE::NameNode process on {{ namenode_hostname }}
servicegroups HDFS
check_command check_tcp_wrapper!{{nn_ha_host_port_map[namenode_hostname]}}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
define service {
host_name {{ namenode_hostname }}
use hadoop-service
service_description HDFS::NameNode RPC latency on {{ namenode_hostname }}
servicegroups HDFS
check_command check_rpcq_latency!NameNode!{{ namenode_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
{% endfor %}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Blocks health
servicegroups HDFS
check_command check_hdfs_blocks!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!0%!0%!{{ nn_metrics_property }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::HDFS capacity utilization
servicegroups HDFS
check_command check_hdfs_capacity!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!80%!90%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
}
{% endif %}
# MAPREDUCE Checks
{# On HDP1 here are jobtracker and tasktracker alters #}
{% if hostgroup_defs['resourcemanager'] %}
# YARN::RESOURCEMANAGER Checks
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager Web UI
servicegroups YARN
check_command check_webui!resourcemanager!{{ rm_port }}
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
{% if env.system.os_family != "suse" %}
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager CPU utilization
servicegroups YARN
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
{% endif %}
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager RPC latency
servicegroups YARN
check_command check_rpcq_latency!ResourceManager!{{ rm_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager process
servicegroups YARN
check_command check_tcp_wrapper!{{ rm_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['nodemanagers'] %}
# YARN::NODEMANAGER Checks
define service {
hostgroup_name nodemanagers
use hadoop-service
service_description NODEMANAGER::NodeManager process
servicegroups YARN
check_command check_tcp_wrapper!{{ nm_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
define service {
hostgroup_name nodemanagers
use hadoop-service
service_description NODEMANAGER::NodeManager health
servicegroups YARN
check_command check_nodemanager_health!{{ nm_port }}!{{ str(security_enabled).lower() }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description NODEMANAGER::Percent NodeManagers live
servicegroups YARN
check_command check_aggregate!"NODEMANAGER::NodeManager process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['historyserver2'] %}
# MAPREDUCE::JOBHISTORY Checks
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer Web UI
servicegroups MAPREDUCE
check_command check_webui!historyserver2!{{ hs_port }}
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
{% if env.system.os_family != "suse" %}
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer CPU utilization
servicegroups MAPREDUCE
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
{% endif %}
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer RPC latency
servicegroups MAPREDUCE
check_command check_rpcq_latency!JobHistoryServer!{{ hs_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer process
servicegroups MAPREDUCE
check_command check_tcp_wrapper!{{ hs_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['journalnodes'] %}
# Journalnode checks
define service {
hostgroup_name journalnodes
use hadoop-service
service_description JOURNALNODE::JournalNode process
servicegroups HDFS
check_command check_tcp_wrapper!{{ journalnode_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% if dfs_ha_enabled %}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Percent JournalNodes live
servicegroups HDFS
check_command check_aggregate!"JOURNALNODE::JournalNode process"!33%!50%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
{% endif %}
{% endif %}
{% if hostgroup_defs['slaves'] %}
# HDFS::DATANODE Checks
define service {
hostgroup_name slaves
use hadoop-service
service_description DATANODE::DataNode process
servicegroups HDFS
check_command check_tcp_wrapper!{{datanode_port}}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
define service {
hostgroup_name slaves
use hadoop-service
service_description DATANODE::DataNode space
servicegroups HDFS
check_command check_datanode_storage!{{ datanode_port }}!90%!90%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
normal_check_interval 2
retry_check_interval 1
max_check_attempts 2
}
{% endif %}
{% if hostgroup_defs['flume-servers'] %}
# FLUME Checks
define service {
hostgroup_name flume-servers
use hadoop-service
service_description FLUME::Flume Agent process
servicegroups FLUME
check_command check_tcp_wrapper!{{ flume_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['zookeeper-servers'] %}
# ZOOKEEPER Checks
define service {
hostgroup_name zookeeper-servers
use hadoop-service
service_description ZOOKEEPER::ZooKeeper Server process
servicegroups ZOOKEEPER
check_command check_tcp_wrapper!{{ clientPort }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['hbasemasters'] %}
# HBASE::REGIONSERVER Checks
define service {
hostgroup_name region-servers
use hadoop-service
service_description REGIONSERVER::RegionServer process
servicegroups HBASE
check_command check_tcp_wrapper!{{ hbase_rs_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{# HBASE:: MASTER Checks
# define service {
# hostgroup_name hbasemasters
# use hadoop-service
# service_description HBASEMASTER::HBase Master Web UI
# servicegroups HBASE
# check_command check_webui!hbase!{{ hbase_master_port }}
# normal_check_interval 1
# retry_check_interval 1
# max_check_attempts 3
# #}
{% for hbasemaster in hbase_master_hosts %}
{% if env.system.os_family != "suse" %}
define service {
host_name {{ hbasemaster }}
use hadoop-service
service_description HBASEMASTER::HBase Master CPU utilization on {{ hbasemaster }}
servicegroups HBASE
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
{% endif %}
define service {
host_name {{ hbasemaster }}
use hadoop-service
service_description HBASEMASTER::HBase Master process on {{ hbasemaster }}
servicegroups HBASE
check_command check_tcp_wrapper!{{ hbase_master_rpc_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 4
}
{% endfor %}
{% endif %}
{% if hostgroup_defs['hiveserver'] %}
# HIVE Metastore check
define service {
hostgroup_name hiveserver
use hadoop-service
service_description HIVE-METASTORE::Hive Metastore process
servicegroups HIVE
check_command check_tcp_wrapper!{{ hive_metastore_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.5
max_check_attempts 3
}
# HIVE Server check
define service {
hostgroup_name hiveserver
use hadoop-service
service_description HIVE-SERVER::HiveServer2 process
servicegroups HIVE
check_command check_tcp_wrapper!{{ hive_server_port }}!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['oozie-server'] %}
# Oozie check
define service {
hostgroup_name oozie-server
use hadoop-service
service_description OOZIE::Oozie Server status
servicegroups OOZIE
{% if security_enabled %}
check_command check_oozie_status!{{ oozie_server_port }}!{{ java64_home }}!true!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}
{% else %}
check_command check_oozie_status!{{ oozie_server_port }}!{{ java64_home }}!false
{% endif %}
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['webhcat-server'] %}
# WEBHCAT check
define service {
hostgroup_name webhcat-server
use hadoop-service
service_description WEBHCAT::WebHCat Server status
servicegroups WEBHCAT
{% if security_enabled %}
check_command check_templeton_status!{{ templeton_port }}!v1!{{ str(security_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}
{% else %}
check_command check_templeton_status!{{ templeton_port }}!v1!false
{% endif %}
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['hue-server'] %}
define service {
hostgroup_name hue-server
use hadoop-service
service_description HUE::Hue Server status
servicegroups HUE
check_command check_hue_status
normal_check_interval 100
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
#FALCON checks
{% if hostgroup_defs['falcon-server'] %}
define service {
hostgroup_name falcon-server
service_description FALCON::Falcon Server process
servicegroups FALCON
check_command check_tcp_wrapper!{{ falcon_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
define service {
hostgroup_name falcon-server
service_description FALCON::Falcon Server Web UI
servicegroups FALCON
check_command check_webui!falconserver!{{ falcon_port }}
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}
{% if hostgroup_defs['ats-servers'] %}
define service {
hostgroup_name ats-servers
use hadoop-service
service_description APP_TIMELINE_SERVER::App Timeline Server process
servicegroups YARN
check_command check_tcp_wrapper!{{ ahs_port }}!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
{% endif %}