323 lines
13 KiB
Django/Jinja
323 lines
13 KiB
Django/Jinja
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description 0 is alive
|
|
check_command check_host
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description NTP status
|
|
check_command check_ntp_time
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
event_handler restart-service!ntp
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description ssh service
|
|
check_command check_ssh
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description load average
|
|
check_command load_average
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description users
|
|
check_command users
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description processes num.
|
|
check_command processes
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description zombie processes
|
|
check_command zombie_processes
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description Network interfaces status
|
|
check_command network_interfaces
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
notes_url http://exchange.nagios.org/directory/Plugins/Network-Protocols/SNMP/Advanced-Network-Interface-Check--2D-check_netint--2F-check_snmp_netint/details
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description root disk
|
|
check_command check_root_disk
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-worker-nodes
|
|
service_description data disk
|
|
check_command check_data_disk
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster-metrics
|
|
service_description ganglia gmond collector
|
|
check_command check_gmond
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
define service {
|
|
hostgroup_name hadoop-cluster
|
|
service_description basic services
|
|
check_command check_system_pp
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
|
|
# Web interfaces
|
|
define service {
|
|
hostgroup_name mapred-jobtracker
|
|
service_description Jobtracker web interface
|
|
check_command check_webui!jobtracker
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# Web interfaces
|
|
define service {
|
|
hostgroup_name mapred-jobtracker-ha
|
|
service_description Jobtracker web interface
|
|
check_command check_webui!jobtracker_ha
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# Web interfaces
|
|
define service {
|
|
hostgroup_name hdfs-namenode
|
|
service_description Namenode web interface
|
|
check_command check_webui!namenode
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# Web interfaces
|
|
define service {
|
|
hostgroup_name hbase-master
|
|
service_description Hbase master web interface
|
|
check_command check_webui!hbase
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HBASE status
|
|
define service {
|
|
hostgroup_name hbase-master
|
|
service_description Hbase status
|
|
check_command hadoop_check_hbase_status
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
# contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# Map Reduce task trackers
|
|
define service {
|
|
hostgroup_name mapred-jobtracker
|
|
service_description Mapreduce tasktrackers status
|
|
check_command hadoop_check_tasktracker
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
event_handler restart-service!hadoop-0.20-mapreduce-tasktracker
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HDFS datanodes
|
|
define service {
|
|
hostgroup_name hdfs-namenode
|
|
service_description HDFS datanodes status
|
|
check_command hadoop_check_datanode
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HDFS blocks status
|
|
define service {
|
|
hostgroup_name hdfs-namenode
|
|
service_description HDFS blocks status
|
|
check_command check_hdfs_blocks!{{ hdfs_nn_http_port }}!1!1
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HDFS capacity
|
|
define service {
|
|
hostgroup_name hdfs-namenode
|
|
service_description HDFS capacity
|
|
check_command check_hdfs_capacity!{{ hdfs_nn_http_port }}!{{ hdfs_warn }}!{{ hdfs_crit }}
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HDFS rpc queue latency
|
|
define service {
|
|
hostgroup_name hdfs-namenode
|
|
service_description HDFS RPC queue latency
|
|
check_command check_rpcq_latency!{{ hdfs_nn_http_port }}!NameNode!3!5
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
# HDFS namenode directories status
|
|
define service {
|
|
hostgroup_name hdfs-namenode
|
|
service_description HDFS namenode directories status
|
|
check_command check_name_dir_status!{{ hdfs_nn_http_port }}
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
# HDFS journal nodes
|
|
define service {
|
|
hostgroup_name hdfs-journal
|
|
service_description HDFS HA journal
|
|
check_command check_hadoop_http_service!{{ hdfs_journal_http_port }}
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HBase thrift
|
|
define service {
|
|
hostgroup_name hbase-thrift
|
|
service_description HBase thrift
|
|
check_command check_hbase_thrift!{{ hbase_thrift_port }}
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# Zookeeper data #
|
|
# Server state (the warning and critical values are mandatory but not used)
|
|
define service {
|
|
hostgroup_name zookeeper
|
|
service_description Zookeeper server state
|
|
check_command check_zookeeper!zk_server_state!1!1
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
# Outstanding requests
|
|
define service {
|
|
hostgroup_name zookeeper
|
|
service_description Zookeeper outstanding requests
|
|
check_command check_zookeeper!zk_outstanding_requests!20!50
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
# Average latency
|
|
define service {
|
|
hostgroup_name zookeeper
|
|
service_description Zookeeper average latency
|
|
check_command check_zookeeper!zk_avg_latency!100!500
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
# Ephemerals count
|
|
define service {
|
|
hostgroup_name zookeeper
|
|
service_description Zookeeper ephemerals
|
|
check_command check_zookeeper!zk_ephemerals_count!3000!5000
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
# Watch count
|
|
define service {
|
|
hostgroup_name zookeeper
|
|
service_description Zookeeper watch count
|
|
check_command check_zookeeper!zk_watch_count!1000!2000
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
# Open file descriptors
|
|
define service {
|
|
hostgroup_name zookeeper
|
|
service_description Zookeeper open file descriptors
|
|
check_command check_zookeeper!zk_open_file_descriptor_count!800!950
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
}
|
|
|
|
# HBASE regionservers health status
|
|
define service {
|
|
hostgroup_name hbase-region-servers
|
|
service_description HBASE regionserver health status
|
|
check_command check_hadoop_http_service!60030
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HDFS datanodes health status
|
|
define service {
|
|
hostgroup_name hdfs-datanodes
|
|
service_description HDFS datanode health status
|
|
check_command check_hadoop_http_service!50075
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# Map Reduce tasktrackers health status
|
|
define service {
|
|
hostgroup_name mapred-tasktrackers
|
|
service_description Map Reduce tasktracker health status
|
|
check_command check_hadoop_http_service!50060
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|
|
# HUE interface
|
|
define service {
|
|
hostgroup_name hue
|
|
service_description Hue interface
|
|
check_command check_hadoop_http_service!8888
|
|
use generic-hadoop-service
|
|
notification_interval 0 ; set > 0 if you want to be renotified
|
|
contact_groups hadoop-managers,hadoop-users
|
|
}
|
|
|