blob: 41c74ef2a9b7d142e93eab710926f366e748119b [file] [log] [blame]
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# NAGIOS SERVER Check (status log update)
<%if scope.function_hdp_nagios_members_exist('nagios-server')-%>
define service {
name hadoop-service
use generic-service
notification_options w,u,c,r,f,s
first_notification_delay 0
notification_interval 0 # Send the notification once
contact_groups admins
notifications_enabled 1
event_handler_enabled 1
register 0
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description NAGIOS::Nagios status log freshness
servicegroups NAGIOS
check_command check_nagios!10!/var/nagios/status.dat!<%=scope.function_hdp_template_var("::hdp-nagios::server::config::nagios_lookup_daemon_str")%>
normal_check_interval 5
retry_check_interval 0.5
max_check_attempts 2
}
# NAGIOS SERVER HDFS Checks
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Percent DataNodes with space available
servicegroups HDFS
check_command check_aggregate!"DATANODE::DataNode space"!10%!30%
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Percent DataNodes live
servicegroups HDFS
check_command check_aggregate!"DATANODE::DataNode process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<% if scope.function_hdp_nagios_members_exist('namenode') &&
(scope.function_hdp_get_major_stack_version([scope.function_hdp_template_var("stack_version")]) >= 2) &&
(scope.function_hdp_template_var("::hdp::params::dfs_ha_enabled"))%>
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::NameNode HA Healthy
servicegroups HDFS
check_command check_namenodes_ha!$HOSTGROUPMEMBERS:namenode$!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 5
}
<%end-%>
# AMBARI AGENT Checks
<%scope.function_hdp_template_var("all_hosts").each_with_index do |hostname, index|-%>
define service {
host_name <%=hostname%>
use hadoop-service
service_description AMBARI::Ambari Agent process
servicegroups AMBARI
check_command check_tcp!<%=scope.function_hdp_template_var("::all_ping_ports")[index]%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.25
max_check_attempts 4
}
<%end-%>
# NAGIOS SERVER ZOOKEEPER Checks
<%if scope.function_hdp_nagios_members_exist('zookeeper-servers')-%>
define service {
hostgroup_name nagios-server
use hadoop-service
service_description ZOOKEEPER::Percent ZooKeeper Servers live
servicegroups ZOOKEEPER
check_command check_aggregate!"ZOOKEEPER::ZooKeeper Server process"!35%!70%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<%end-%>
# NAGIOS SERVER HBASE Checks
<%if scope.function_hdp_nagios_members_exist('hbasemasters')-%>
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HBASE::Percent RegionServers live
servicegroups HBASE
check_command check_aggregate!"REGIONSERVER::RegionServer process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<%end-%>
<%end-%>
# GANGLIA SERVER Checks
<%if scope.function_hdp_nagios_members_exist('ganglia-server')-%>
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Server process
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for Slaves
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_slaves_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for NameNode
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_namenode_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
<%if scope.function_hdp_nagios_members_exist('jobtracker')-%>
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for JobTracker
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_jobtracker_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('hbasemasters')-%>
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for HBase Master
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_hbase_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('resourcemanager')-%>
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for ResourceManager
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_rm_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('historyserver2')-%>
define service {
hostgroup_name ganglia-server
use hadoop-service
service_description GANGLIA::Ganglia Monitor process for HistoryServer
servicegroups GANGLIA
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_hs_port")%>!-w 1 -c 1
normal_check_interval 0.25
retry_check_interval 0.25
max_check_attempts 4
}
<%end-%>
<%end-%>
<%if scope.function_hdp_nagios_members_exist('snamenode')-%>
# Secondary namenode checks
define service {
hostgroup_name snamenode
use hadoop-service
service_description NAMENODE::Secondary NameNode process
servicegroups HDFS
check_command check_tcp!<%=scope.function_hdp_template_var("snamenode_port")%>!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('namenode')-%>
# HDFS Checks
<% @namenodes = scope.function_hdp_template_var("::hdp::params::namenode_host"); @namenodes.each do |namenode| -%>
define service {
host_name <%= namenode %>
use hadoop-service
service_description NAMENODE::NameNode edit logs directory status on <%= namenode %>
servicegroups HDFS
check_command check_name_dir_status!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 0.5
retry_check_interval 0.5
max_check_attempts 3
}
<% if scope.function_hdp_template_var("hdp_os_type") != "suse"%>
define service {
host_name <%= namenode %>
use hadoop-service
service_description NAMENODE::NameNode host CPU utilization on <%= namenode %>
servicegroups HDFS
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
<% end %>
define service {
host_name <%= namenode %>
use hadoop-service
service_description NAMENODE::NameNode Web UI on <%= namenode %>
servicegroups HDFS
check_command check_webui!namenode!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
define service {
host_name <%= namenode %>
use hadoop-service
service_description NAMENODE::NameNode process on <%= namenode %>
servicegroups HDFS
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::namenode_metadata_port")%>!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
define service {
host_name <%= namenode %>
use hadoop-service
service_description HDFS::NameNode RPC latency on <%= namenode %>
servicegroups HDFS
check_command check_rpcq_latency!NameNode!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>!3000!5000!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
<% end -%>
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Blocks health
servicegroups HDFS
check_command check_hdfs_blocks!$HOSTGROUPMEMBERS:namenode$!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>!0%!0%!<%=scope.function_hdp_template_var("::hdp-nagios::params::nn_metrics_property")%>!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::HDFS capacity utilization
servicegroups HDFS
check_command check_hdfs_capacity!$HOSTGROUPMEMBERS:namenode$!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>!80%!90%!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 10
retry_check_interval 1
max_check_attempts 1
}
<%end-%>
# MAPREDUCE Checks
<%if scope.function_hdp_nagios_members_exist('jobtracker')-%>
define service {
hostgroup_name jobtracker
use hadoop-service
service_description JOBTRACKER::JobTracker Web UI
servicegroups MAPREDUCE
check_command check_webui!jobtracker!<%=scope.function_hdp_template_var("::hdp::jtnode_port")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
define service {
hostgroup_name jobtracker
use hadoop-service
service_description JOBTRACKER::HistoryServer Web UI
servicegroups MAPREDUCE
check_command check_webui!jobhistory!<%=scope.function_hdp_template_var("::hdp::jobhistory_port")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
<% if scope.function_hdp_template_var("hdp_os_type") != "suse"%>
define service {
hostgroup_name jobtracker
use hadoop-service
service_description JOBTRACKER::JobTracker CPU utilization
servicegroups MAPREDUCE
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
<% end %>
define service {
hostgroup_name jobtracker
use hadoop-service
use hadoop-service
service_description JOBTRACKER::JobTracker process
servicegroups MAPREDUCE
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::jtnode_port")%>!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 4
}
define service {
hostgroup_name jobtracker
use hadoop-service
service_description MAPREDUCE::JobTracker RPC latency
servicegroups MAPREDUCE
check_command check_rpcq_latency!JobTracker!<%=scope.function_hdp_template_var("::hdp::jtnode_port")%>!3000!5000!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('tasktracker-servers')-%>
define service {
hostgroup_name nagios-server
use hadoop-service
service_description MAPREDUCE::Percent TaskTrackers live
servicegroups MAPREDUCE
check_command check_aggregate!"TASKTRACKER::TaskTracker process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
# MAPREDUCE::TASKTRACKER Checks
define service {
hostgroup_name tasktracker-servers
use hadoop-service
service_description TASKTRACKER::TaskTracker process
servicegroups MAPREDUCE
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::tasktracker_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
# MAPREDUCE::TASKTRACKER Mapreduce local dir used space
define service {
hostgroup_name tasktracker-servers
use hadoop-service
service_description TASKTRACKER::MapReduce local dir space
servicegroups MAPREDUCE
check_command check_mapred_local_dir_used_space!<%=scope.function_hdp_default("::hdp::mapred-site/mapred.local.dir")%>!85%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('resourcemanager')-%>
# YARN::RESOURCEMANAGER Checks
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager Web UI
servicegroups YARN
check_command check_webui!resourcemanager!<%=scope.function_hdp_template_var("::hdp::rm_port")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
<% if scope.function_hdp_template_var("hdp_os_type") != "suse"%>
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager CPU utilization
servicegroups YARN
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
<% end %>
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager RPC latency
servicegroups YARN
check_command check_rpcq_latency!ResourceManager!<%=scope.function_hdp_template_var("::hdp::rm_port")%>!3000!5000!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
define service {
hostgroup_name resourcemanager
use hadoop-service
service_description RESOURCEMANAGER::ResourceManager process
servicegroups YARN
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::rm_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
<% end %>
<%if scope.function_hdp_nagios_members_exist('nodemanagers')-%>
# YARN::NODEMANAGER Checks
define service {
hostgroup_name nodemanagers
use hadoop-service
service_description NODEMANAGER::NodeManager process
servicegroups YARN
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::nm_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
define service {
hostgroup_name nodemanagers
use hadoop-service
service_description NODEMANAGER::NodeManager health
servicegroups YARN
check_command check_nodemanager_health!<%=scope.function_hdp_template_var("::hdp::nm_port")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
define service {
hostgroup_name nagios-server
use hadoop-service
service_description NODEMANAGER::Percent NodeManagers live
servicegroups YARN
check_command check_aggregate!"NODEMANAGER::NodeManager process"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<% end %>
<%if scope.function_hdp_nagios_members_exist('historyserver2')-%>
# MAPREDUCE::JOBHISTORY Checks
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer Web UI
servicegroups MAPREDUCE
check_command check_webui!historyserver2!<%=scope.function_hdp_template_var("::hdp::hs_port")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
<% if scope.function_hdp_template_var("hdp_os_type") != "suse"%>
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer CPU utilization
servicegroups MAPREDUCE
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
<% end %>
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer RPC latency
servicegroups MAPREDUCE
check_command check_rpcq_latency!JobHistoryServer!<%=scope.function_hdp_template_var("::hdp::hs_port")%>!3000!5000!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 5
retry_check_interval 1
max_check_attempts 5
}
define service {
hostgroup_name historyserver2
use hadoop-service
service_description JOBHISTORY::HistoryServer process
servicegroups MAPREDUCE
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::hs_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
<% end %>
<%if scope.function_hdp_nagios_members_exist('journalnodes')-%>
# Journalnode checks
define service {
hostgroup_name journalnodes
use hadoop-service
service_description JOURNALNODE::JournalNode process
servicegroups HDFS
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::journalnode_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
<%if scope.function_hdp_template_var("::hdp::params::dfs_ha_enabled")-%>
define service {
hostgroup_name nagios-server
use hadoop-service
service_description HDFS::Percent JournalNodes live
servicegroups HDFS
check_command check_aggregate!"JOURNALNODE::JournalNode process"!33%!50%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
}
<%end-%>
<%end-%>
<%if scope.function_hdp_nagios_members_exist('slaves')-%>
# HDFS::DATANODE Checks
define service {
hostgroup_name slaves
use hadoop-service
service_description DATANODE::DataNode process
servicegroups HDFS
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::datanode_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
define service {
hostgroup_name slaves
use hadoop-service
service_description DATANODE::DataNode space
servicegroups HDFS
check_command check_datanode_storage!<%=scope.function_hdp_template_var("::hdp::datanode_port")%>!90%!90%!<%=scope.function_hdp_template_var("::hdp::params::hadoop_ssl_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("::hdp::params::kinit_path_local")%>!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>
normal_check_interval 5
retry_check_interval 1
max_check_attempts 2
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('flume-servers')-%>
# FLUME Checks
define service {
hostgroup_name flume-servers
use hadoop-service
service_description FLUME::Flume Agent process
servicegroups FLUME
check_command check_tcp!<%=scope.function_hdp_template_var("flume_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('zookeeper-servers')-%>
# ZOOKEEPER Checks
define service {
hostgroup_name zookeeper-servers
use hadoop-service
service_description ZOOKEEPER::ZooKeeper Server process
servicegroups ZOOKEEPER
check_command check_tcp!<%=scope.function_hdp_template_var("::clientPort")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('hbasemasters')-%>
# HBASE::REGIONSERVER Checks
define service {
hostgroup_name region-servers
use hadoop-service
service_description REGIONSERVER::RegionServer process
servicegroups HBASE
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::hbase_rs_port")%>!-w 1 -c 1
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
# HBASE:: MASTER Checks
define service {
hostgroup_name hbasemasters
use hadoop-service
service_description HBASEMASTER::HBase Master Web UI
servicegroups HBASE
check_command check_webui!hbase!<%=scope.function_hdp_template_var("::hdp::hbase_master_port")%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
<% if scope.function_hdp_template_var("hdp_os_type") != "suse"%>
define service {
hostgroup_name hbasemasters
use hadoop-service
service_description HBASEMASTER::HBase Master CPU utilization
servicegroups HBASE
check_command check_cpu!200%!250%
normal_check_interval 5
retry_check_interval 2
max_check_attempts 5
}
<% end %>
define service {
hostgroup_name hbasemasters
use hadoop-service
service_description HBASEMASTER::HBase Master process
servicegroups HBASE
check_command check_tcp!<%=scope.function_hdp_template_var("::hdp::hbase_master_port")%>!-w 1 -c 1
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 4
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('hiveserver')-%>
# HIVE Metastore check
define service {
hostgroup_name hiveserver
use hadoop-service
service_description HIVE-METASTORE::Hive Metastore status
servicegroups HIVE-METASTORE
<%if scope.function_hdp_template_var("::hdp::params::security_enabled")-%>
check_command check_hive_metastore_status!<%=scope.function_hdp_template_var("::hive_metastore_port")%>!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
<%else-%>
check_command check_hive_metastore_status!<%=scope.function_hdp_template_var("::hive_metastore_port")%>!<%=scope.function_hdp_template_var("java64_home")%>!false
<%end-%>
normal_check_interval 0.5
retry_check_interval 0.5
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('oozie-server')-%>
# Oozie check
define service {
hostgroup_name oozie-server
use hadoop-service
service_description OOZIE::Oozie Server status
servicegroups OOZIE
<%if scope.function_hdp_template_var("::hdp::params::security_enabled")-%>
check_command check_oozie_status!<%=scope.function_hdp_template_var("::hdp::oozie_server_port")%>!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
<%else-%>
check_command check_oozie_status!<%=scope.function_hdp_template_var("::hdp::oozie_server_port")%>!<%=scope.function_hdp_template_var("java64_home")%>!false
<%end-%>
normal_check_interval 1
retry_check_interval 1
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('webhcat-server')-%>
# WEBHCAT check
define service {
hostgroup_name webhcat-server
use hadoop-service
service_description WEBHCAT::WebHCat Server status
servicegroups WEBHCAT
<%if scope.function_hdp_template_var("::hdp::params::security_enabled")-%>
check_command check_templeton_status!<%=scope.function_hdp_template_var("::hdp::templeton_port")%>!v1!<%=scope.function_hdp_template_var("::hdp::params::security_enabled")%>!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
<%else-%>
check_command check_templeton_status!<%=scope.function_hdp_template_var("::hdp::templeton_port")%>!v1!false
<%end-%>
normal_check_interval 1
retry_check_interval 0.5
max_check_attempts 3
}
<%end-%>
<%if scope.function_hdp_nagios_members_exist('hue-server')-%>
define service {
hostgroup_name hue-server
use hadoop-service
service_description HUE::Hue Server status
servicegroups HUE
check_command check_hue_status
normal_check_interval 100
retry_check_interval 0.5
max_check_attempts 3
}
<%end-%>