update nagios scripts with new paths
Stephen Soltesz [Fri, 18 Jun 2010 21:55:13 +0000 (21:55 +0000)]
add monitor-nagios package to spec file
remove pcucontrol from setup.py

12 files changed:
Monitor.spec
commands/checkmode.py [deleted file]
commands/checkpcu.py [deleted file]
commands/escalation.py [deleted file]
commands/mail.py [deleted file]
commands/repair.py [deleted file]
nagios/plc_hosts_to_nagios.py
nagios/plc_users_to_nagios.py
setup.py
tools/nagiosobjects.py [deleted file]
tools/plc_hosts_to_nagios.py [deleted file]
tools/plc_users_to_nagios.py [deleted file]

index 22dfd7e..a26bd83 100644 (file)
@@ -35,6 +35,34 @@ system, syncing the PLC db with the monitoring database, notifying users,
 interacting with PCU hardware, applying penalties to sites that violate
 acceptable use.
 
+######################################## NAGIOS
+
+%package nagios
+Summary: Monitor integration with Nagios
+Group: Applications/System
+
+Requires: coreutils
+Requires: passwd
+Requires: gd
+Requires: gd-devel
+Requires: mysql
+Requires: mysql-server
+Requires: mysql-devel
+Requires: mysql-libs
+Requires: mailx
+
+Requires: nagios
+Requires: nagios-common
+Requires: nagios-devel
+Requires: nagios-plugins-all
+Requires: ndoutils
+Requires: ndoutils-mysql
+
+
+%description nagios
+Scripts and setup necessary to integrate and monitor PLC with Nagios.
+Best suited to F12 or above.
+
 ######################################## CLIENT
 
 %package client
@@ -128,6 +156,8 @@ install -d $RPM_BUILD_ROOT/%{python_sitearch}/monitor
 install -D -m 644 monitor.functions $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor.functions
 install -D -m 755 monitor-server.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor
 install -D -m 755 zabbix/monitor-zabbix.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/zabbix
+# TODO: update with a real init file
+install -D -m 755 monitor-server.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/monitor-nagios
 
 # cron job for automated polling
 install -D -m 644 monitor-server.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/monitor-server.cron
@@ -170,6 +200,11 @@ rm -rf $RPM_BUILD_ROOT
 %files server-deps
 /var/log/server-deps.log
 
+%files nagios
+%defattr(-,root,root)
+%{_sysconfdir}/plc.d/monitor-nagios
+#/usr/share/%{name}/nagios # TODO: not sure how this will impact the server files
+
 %files server
 %defattr(-,root,root)
 #%config /usr/share/%{name}/monitorconfig.py
@@ -184,6 +219,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_sysconfdir}/httpd/conf.d
 %{python_sitearch}
 
+
 %files client
 %defattr(-,root,root)
 #%{_initrddir}/monitor
@@ -194,6 +230,7 @@ rm -rf $RPM_BUILD_ROOT
 /usr/bin/RunlevelAgent.py*
 /%{_initrddir}/monitor-runlevelagent
 
+
 %post server-deps
 #
 # TODO: depend on distribution packages where feasible.
@@ -248,6 +285,9 @@ if ! plc-config --category plc_zabbix --variable ip ; then
                        --save /etc/planetlab/configs/site.xml /etc/planetlab/configs/site.xml 
 fi
 
+%post nagios
+# TODO: do as much as possible to get the host setup and running.
+
 %post server
 # TODO: this will be nice when we have a web-based service running., such as
 #              an API server or so on.
diff --git a/commands/checkmode.py b/commands/checkmode.py
deleted file mode 100755 (executable)
index 2be4198..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/python
-
-import time
-import sys
-import os
-
-from monitor.wrapper import plc
-
-def argv_to_dict(argv):
-       """
-               NOTE: very bare-bones, no error checking, will fail easily.
-       """
-       d = {}
-       prev=None
-       for a in argv:
-               if "--" == a[0:2]:
-                       prev = a[2:]
-               elif "-" == a[0:1]:
-                       prev = a[1:]
-               else:
-                       d[prev] = a
-       return d
-
-def main():
-       d = argv_to_dict(sys.argv[1:])
-
-       api = plc.api
-       if 'hostname' in d or 'H' in d:
-               try:
-                       hostname = d['host']
-               except:
-                       hostname = d['H']
-       else:
-               print "UNKNOWN: argument error"
-               sys.exit(3)
-
-       try:
-               n = api.GetNodes(hostname)[0]
-       except:
-               print "UNKNOWN: API failure"
-               sys.exit(3)
-
-       if n['last_contact']:
-               t1 = n['last_contact']
-       else:
-               t1 = 0
-       t2 = time.time()
-       #print n['boot_state'], n['run_level'], t1, t2, t2-t1
-
-       if t2-t1 < 60*60*30:
-               if n['boot_state'] == n['run_level']:
-                       print "OK: bootstate matches runlevel and lastcontact is up to date"
-                       sys.exit(0)
-               else:
-                       print "WARNING: bootstate does not match runlevel"
-                       sys.exit(1)
-       else:
-               print "CRITICAL: node last_contact is stale, assumed offline"
-               sys.exit(2)
-
-
-if __name__ == '__main__':
-       f = open("/tmp/checkmode", 'a')
-       f.write("checkmode %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-       f.close()
-       main()
diff --git a/commands/checkpcu.py b/commands/checkpcu.py
deleted file mode 100755 (executable)
index 4524cd0..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/python
-
-import time
-import sys
-import os
-
-from monitor.wrapper import plc
-
-def argv_to_dict(argv):
-       """
-               NOTE: very bare-bones, no error checking, will fail easily.
-       """
-       d = {}
-       prev=None
-       for a in argv:
-               if "--" == a[0:2]:
-                       prev = a[2:]
-               elif "-" == a[0:1]:
-                       prev = a[1:]
-               else:
-                       d[prev] = a
-       return d
-
-def main():
-       d = argv_to_dict(sys.argv[1:])
-
-       api = plc.api
-       if 'hostname' in d or 'H' in d:
-               try:
-                       hostname = d['host']
-               except:
-                       hostname = d['H']
-       else:
-               print "UNKNOWN: argument error"
-               sys.exit(3)
-
-       try:
-               n = api.GetNodes(hostname)[0]
-       except:
-               print "UNKNOWN: API failure"
-               sys.exit(3)
-
-       t1 = 0
-       t2 = time.time()
-
-       if True:
-               print "FAKE-OK: PCU test successful"
-               sys.exit(0)
-       elif False:
-               print "FAKE-WARNING: PCU configuration incomplete"
-               sys.exit(1)
-       else:
-               print "FAKE-CRITICAL: PCU test failed"
-               sys.exit(2)
-
-
-if __name__ == '__main__':
-       f = open("/tmp/checkpcu", 'a')
-       f.write("checkpcu %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-       f.close()
-       main()
diff --git a/commands/escalation.py b/commands/escalation.py
deleted file mode 100755 (executable)
index c4979b6..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/python
-
-import time
-import sys
-
-
-if __name__ == '__main__':
-       f = open("/tmp/escalation", 'a')
-       f.write("escalation %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-       f.close()
diff --git a/commands/mail.py b/commands/mail.py
deleted file mode 100755 (executable)
index 84d8217..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/python
-
-import time
-import sys
-import os
-
-
-def argv_to_dict(argv):
-       """
-               NOTE: very bare-bones, no error checking, will fail easily.
-       """
-       d = {}
-       prev=None
-       for a in argv:
-               if "--" in a:
-                       prev = a[2:]
-               else:
-                       d[prev] = a
-       return d
-
-if __name__ == '__main__':
-       f = open("/tmp/myopsmail", 'a')
-       f.write("mail %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-       f.close()
-
-       d = argv_to_dict(sys.argv[1:])
-       command_line="""/usr/bin/printf "%%b" "***** MyOpsNagios %(hostnotificationnumber)s *****\\n\\nNotification Type: %(notificationtype)s\\nHost: %(hostname)s\\nState: %(hoststate)s\\nAddress: %(hostaddress)s\\nInfo: %(hostoutput)s\\n\\nDate/Time: %(longdatetime)s\\n" | /bin/mail -S replyto=monitor@planet-lab.org -s "** %(notificationtype)s Host Alert: %(hostname)s is %(hoststate)s **" %(contactemail)s""" % d
-       os.system(command_line)
-
-
diff --git a/commands/repair.py b/commands/repair.py
deleted file mode 100755 (executable)
index 0706b02..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/python
-
-import time
-import sys
-import os
-
-if __name__ == '__main__':
-       f = open("/tmp/repair", 'a')
-       f.write("repair %s %s\n" % (time.time(), " ".join(sys.argv[1:])))
-       f.close()
index 7baeafd..c0008a6 100755 (executable)
@@ -2,16 +2,16 @@
 from nagiosobjects import *
 
 command_auto = Command(command_name="check_mode",
-                                          command_line="""/usr/share/monitor/commands/checkmode.py -H $HOSTNAME$ --sn $SERVICENOTIFICATIONNUMBER$ """)
+                                          command_line="""/usr/share/monitor/nagios/plugins/checkmode.py -H $HOSTNAME$ --sn $SERVICENOTIFICATIONNUMBER$ """)
 print command_auto.toString()
 
 command_auto = Command(command_name="check_pcu",
-                                          command_line="""/usr/share/monitor/commands/checkpcu.py -H $HOSTNAME$ """)
+                                          command_line="""/usr/share/monitor/nagios/plugins/checkpcu.py -H $HOSTNAME$ """)
 print command_auto.toString()
 
 
 command_auto = Command(command_name="automate-policy-escalation-command",
-                                          command_line="""/usr/share/monitor/commands/escalation.py $HOSTNAME$ $HOSTNOTIFICATIONNUMBER$ $HOSTDURATIONSEC$ $NOTIFICATIONTYPE$ """)
+                                          command_line="""/usr/share/monitor/nagios/actions/escalation.py $HOSTNAME$ $HOSTNOTIFICATIONNUMBER$ $HOSTDURATIONSEC$ $NOTIFICATIONTYPE$ """)
 contact_auto = Contact(contact_name="automate-policy-escalation-contact",
                                                host_notifications_enabled=1,
                                                service_notifications_enabled=0,
@@ -27,7 +27,7 @@ print contact_auto.toString()
 
 
 command_auto = Command(command_name="automate-service-repair-command",
-                                          command_line="""/usr/share/monitor/commands/repair.py $SERVICENOTIFICATIONNUMBER$ $HOSTNOTIFICATIONNUMBER$ $NOTIFICATIONTYPE$ $HOSTNAME$ $SERVICEDESC$""")
+                                          command_line="""/usr/share/monitor/nagios/actions/repair.py $SERVICENOTIFICATIONNUMBER$ $HOSTNOTIFICATIONNUMBER$ $NOTIFICATIONTYPE$ $HOSTNAME$ $SERVICEDESC$""")
 
 contact_auto = Contact(contact_name="automate-service-repair-contact",
                                                host_notifications_enabled=1,
@@ -53,7 +53,7 @@ print command_cluster.toString()
 
 
 command_auto = Command(command_name="automate-host-reboot-command",
-                                          command_line="""/usr/share/monitor/commands/reboot.py $NOTIFICATIONTYPE$ $HOSTNAME$""")
+                                          command_line="""/usr/share/monitor/nagios/actions/reboot.py $NOTIFICATIONTYPE$ $HOSTNAME$""")
 
 contact_auto = Contact(contact_name="automate-host-reboot-contact",
                                                host_notifications_enabled=1,
index 114dcf0..4771578 100755 (executable)
@@ -40,7 +40,7 @@ def getContactsAndContactGroupsFor(lb, type, email_list):
 
 
 host_email_command = Command(command_name="monitor-notify-host-by-email",
-                                                command_line="""/usr/share/monitor/commands/mail.py --hostnotificationnumber $HOSTNOTIFICATIONNUMBER$ --notificationtype $NOTIFICATIONTYPE$ --hostname $HOSTNAME$ --hoststate $HOSTSTATE$ --hostaddress $HOSTADDRESS$ --hostoutput "$HOSTOUTPUT$" --longdatetime "$LONGDATETIME$" --notificationitype $NOTIFICATIONTYPE$ --contactemail $CONTACTEMAIL$""")
+                                                command_line="""/usr/share/monitor/nagios/actions/mail.py --hostnotificationnumber $HOSTNOTIFICATIONNUMBER$ --notificationtype $NOTIFICATIONTYPE$ --hostname $HOSTNAME$ --hoststate $HOSTSTATE$ --hostaddress $HOSTADDRESS$ --hostoutput "$HOSTOUTPUT$" --longdatetime "$LONGDATETIME$" --notificationitype $NOTIFICATIONTYPE$ --contactemail $CONTACTEMAIL$""")
 
 service_email_command = Command(command_name="monitor-notify-service-by-email",
                                                        command_line="""/usr/bin/printf "%b" "***** MyOpsNagios $HOSTNOTIFICATIONNUMBER$ *****\\n\\nNotification Type: $NOTIFICATIONTYPE$\\n\\nService: $SERVICEDESC$\\nHost: $HOSTALIAS$\\nAddress: $HOSTADDRESS$\\nState: $SERVICESTATE$\\n\\nDate/Time: $LONGDATETIME$\\n\\nAdditional Info:\\n\\n$SERVICEOUTPUT$" | /bin/mail -S replyto=monitor@planet-lab.org -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$""")
index a9744ee..d3dbde9 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -22,24 +22,24 @@ setup(name='MonitorModule',
       url='http://www.planet-lab.org',
       packages=packages)
 
-packages=['pcucontrol', 
-          'pcucontrol.util',
-          'pcucontrol.transports',
-          'pcucontrol.transports.ssh',
-          'pcucontrol.transports.pyssh',
-          'pcucontrol.models',
-          'pcucontrol.models.hpilo',
-          'pcucontrol.models.hpilo.iloxml',
-          'pcucontrol.models.intelamt',
-          'pcucontrol.models.intelamt']
-
-# TODO: add data dir for intelamt and hpilo stuff
-print packages
-setup(name='PCUControlModule',
-      version=pcucontrol_version,
-      description='PCU Control Module',
-      author='Stephen Soltesz',
-      author_email='soltesz@cs.princeton.edu',
-      url='http://www.planet-lab.org',
-      packages=packages)
+#packages=['pcucontrol', 
+#          'pcucontrol.util',
+#          'pcucontrol.transports',
+#          'pcucontrol.transports.ssh',
+#          'pcucontrol.transports.pyssh',
+#          'pcucontrol.models',
+#          'pcucontrol.models.hpilo',
+#          'pcucontrol.models.hpilo.iloxml',
+#          'pcucontrol.models.intelamt',
+#          'pcucontrol.models.intelamt']
+#
+## TODO: add data dir for intelamt and hpilo stuff
+#print packages
+#setup(name='PCUControlModule',
+#      version=pcucontrol_version,
+#      description='PCU Control Module',
+#      author='Stephen Soltesz',
+#      author_email='soltesz@cs.princeton.edu',
+#      url='http://www.planet-lab.org',
+#      packages=packages)
 
diff --git a/tools/nagiosobjects.py b/tools/nagiosobjects.py
deleted file mode 100644 (file)
index 332fb40..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-
-class NagiosObject(object):
-       trans = {'d2_coords': '2d_coords'}
-
-       def __init__(self, id, **kwargs):
-               self.id = id
-               self.kwords = kwargs.keys()
-               for key in self.kwords:
-                       self.__setattr__(key, kwargs[key])
-
-       def toString(self):
-               ret = ""
-               ret += "define %s {\n" % self.id
-               for key in self.kwords:
-                       if key in self.trans:
-                               ret += "    %s   %s\n" % (self.trans[key], self.__getattribute__(key))
-                       else:
-                               ret += "    %s   %s\n" % (key, self.__getattribute__(key))
-               ret += "}\n"
-               return ret
-
-class Command(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "command", **kwargs)
-
-class Host(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "host", **kwargs)
-
-class HostGroup(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "hostgroup", **kwargs)
-
-class HostEscalation(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "hostescalation", **kwargs)
-
-class Contact(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "contact", **kwargs)
-
-class ContactGroup(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "contactgroup", **kwargs)
-
-class Service(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "service", **kwargs)
-
-class ServiceDependency(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "servicedependency", **kwargs)
-
-class ServiceEscalation(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "serviceescalation", **kwargs)
-
-class ServiceGroup(NagiosObject):
-       def __init__(self, **kwargs):   
-               NagiosObject.__init__(self, "servicegroup", **kwargs)
diff --git a/tools/plc_hosts_to_nagios.py b/tools/plc_hosts_to_nagios.py
deleted file mode 100755 (executable)
index 7baeafd..0000000
+++ /dev/null
@@ -1,330 +0,0 @@
-#!/usr/bin/python
-from nagiosobjects import *
-
-command_auto = Command(command_name="check_mode",
-                                          command_line="""/usr/share/monitor/commands/checkmode.py -H $HOSTNAME$ --sn $SERVICENOTIFICATIONNUMBER$ """)
-print command_auto.toString()
-
-command_auto = Command(command_name="check_pcu",
-                                          command_line="""/usr/share/monitor/commands/checkpcu.py -H $HOSTNAME$ """)
-print command_auto.toString()
-
-
-command_auto = Command(command_name="automate-policy-escalation-command",
-                                          command_line="""/usr/share/monitor/commands/escalation.py $HOSTNAME$ $HOSTNOTIFICATIONNUMBER$ $HOSTDURATIONSEC$ $NOTIFICATIONTYPE$ """)
-contact_auto = Contact(contact_name="automate-policy-escalation-contact",
-                                               host_notifications_enabled=1,
-                                               service_notifications_enabled=0,
-                                               host_notification_period="24x7",
-                                               host_notification_options="d,r",
-                                               host_notification_commands="automate-policy-escalation-command",
-                                               service_notification_period="24x7",
-                                               service_notification_options="c,w,r",
-                                               service_notification_commands="monitor-notify-service-by-email",
-                                               email="not.an.email")
-print command_auto.toString()
-print contact_auto.toString()
-
-
-command_auto = Command(command_name="automate-service-repair-command",
-                                          command_line="""/usr/share/monitor/commands/repair.py $SERVICENOTIFICATIONNUMBER$ $HOSTNOTIFICATIONNUMBER$ $NOTIFICATIONTYPE$ $HOSTNAME$ $SERVICEDESC$""")
-
-contact_auto = Contact(contact_name="automate-service-repair-contact",
-                                               host_notifications_enabled=1,
-                                               service_notifications_enabled=1,
-                                               host_notification_period="24x7",
-                                               host_notification_options="d,r",
-                                               host_notification_commands="monitor-notify-host-by-email",
-                                               service_notification_period="24x7",
-                                               service_notification_options="c,w,r",
-                                               service_notification_commands="automate-service-repair-command",
-                                               email="not.an.email")
-
-print command_auto.toString()
-print contact_auto.toString()
-
-command_cluster = Command(command_name="check_service_cluster",
-                                        command_line="$USER1$/check_cluster --service -l $ARG1$ -w $ARG2$ -c $ARG3$ -d $ARG4$")
-print command_cluster.toString()
-
-command_cluster = Command(command_name="check_cluster",
-                                        command_line="$USER1$/check_cluster --host -l $ARG1$ -w $ARG2$ -c $ARG3$ -d $ARG4$")
-print command_cluster.toString()
-
-
-command_auto = Command(command_name="automate-host-reboot-command",
-                                          command_line="""/usr/share/monitor/commands/reboot.py $NOTIFICATIONTYPE$ $HOSTNAME$""")
-
-contact_auto = Contact(contact_name="automate-host-reboot-contact",
-                                               host_notifications_enabled=1,
-                                               service_notifications_enabled=0,
-                                               host_notification_period="24x7",
-                                               host_notification_options="d,r",
-                                               host_notification_commands="automate-host-reboot-command",
-                                               service_notification_period="24x7",
-                                               service_notification_commands="monitor-notify-service-by-email",
-                                               email="not.an.email")
-
-print command_auto.toString()
-print contact_auto.toString()
-
-globalservices = []
-for service in [('NET', "Network Services"),
-                               ('SSH', "SSH Service"),
-                               #('SSH806', "Auxiliary SSH Service"),
-                               ('MODE', "PLC Node Mode"),
-                               ('PCU', "PLC PCU status"),
-                               #('HTTP', "PlanetFlow HTTP"),
-                               #('COTOP', "HTTP based COTOP"),
-                               ]:
-                               #('PLSOFT', "PlanetLab Software"),
-                               #('MGMT',  "Remote Management")]:
-       globalservices.append(ServiceGroup(servicegroup_name=service[0], alias=service[1]))
-
-
-# NOTE: since ping is not a reliable check in the wide area, use 'check_ssh'
-#              to determine if the host is minimally online.  If we cannot access
-#              port 22 it, then it is DOWN.
-
-globalhost = [Host(    name="planetlab-host",
-                                       use="generic-host",
-                                       check_period="24x7",
-                                       check_interval="120",
-                                       retry_interval="10",
-                                       max_check_attempts="6",
-                                       check_command="check_ssh!-t 120",
-                                       first_notification_delay=0, # 60*24*.5, # wait half a day before taking any action
-                                       #contact_groups="admins",
-                                       register="0"),
-                         Service(name="planetlab-service",
-                                       active_checks_enabled="1",
-                                       passive_checks_enabled="1",
-                                       parallelize_check="1",
-                                       obsess_over_service="1",
-                                       check_freshness="0",
-                                       notifications_enabled="0",
-                                       event_handler_enabled="1",
-                                       flap_detection_enabled="1",
-                                       failure_prediction_enabled="1",
-                                       process_perf_data="1",
-                                       retain_status_information="1",
-                                       retain_nonstatus_information="1",
-                                       is_volatile="0",
-                                       check_period="24x7",
-                                       max_check_attempts="3",
-                                       normal_check_interval="30",     # NOTE: make this reasonable for N machines.
-                                       retry_check_interval="5",
-                                       notification_options="w,u,c,r",
-                                       notification_interval="60",
-                                       notification_period="24x7",
-                                       register="0")
-                       ]
-
-for obj in globalhost + globalservices:
-       print obj.toString()
-
-from monitor.wrapper import plc
-from monitor.generic import *
-
-l_sites = plc.api.GetSites({'login_base' : ['asu', 'gmu', 'gt']})
-#l_sites = plc.api.GetSites([10243, 22, 10247, 138, 139, 10050, 10257, 18, 20, 
-#                                                      21, 10134, 24, 10138, 10141, 30, 31, 33, 10279, 41, 29, 10193, 10064, 81,
-#                                                      10194, 10067, 87, 10208, 10001, 233, 157, 10100, 10107])
-
-node_ids = [ s['node_ids'] for s in l_sites ]
-node_ids = [ map(str,n) for n in node_ids ] 
-node_ids = [ ",".join(n) for n in node_ids ] 
-node_ids = ",".join(node_ids)
-node_ids = map(int, node_ids.split(","))
-
-l_nodes = plc.api.GetNodes(node_ids)
-
-(d_sites,id2lb) = dsites_from_lsites_id(l_sites)
-(plcdb, hn2lb, lb2hn) = dsn_from_dsln(d_sites, id2lb, l_nodes)
-
-netid2ip = d_from_l(plc.api.GetInterfaces(), 'interface_id')
-
-ServiceDependency
-hg = HostGroup(hostgroup_name="allsites", alias="allsites")
-print hg.toString()
-
-for site in l_sites:
-       shortname = site['abbreviated_name']
-       lb = site['login_base']
-       hg = HostGroup(hostgroup_name=lb, alias=shortname)
-       lat = site['latitude']
-       lon = site['longitude']
-       lon_x = -1
-       lat_y = -1
-       if lat is not None and lon is not None:
-               scale = 5
-               lon_x = int(180 + lon) * scale
-               lat_y = int(180 - (lat + 90)) * scale
-
-       if site['login_base'] in lb2hn:
-               nodes = lb2hn[site['login_base']]
-       else:
-               continue
-
-       if len(nodes) == 0:
-               continue
-
-       #print hg.toString()
-
-
-       hostname_list = []
-       for node in nodes:
-               hn = node['hostname']
-               if len(node['interface_ids']) == 0:
-                       continue
-
-               ip = netid2ip[str(node['interface_ids'][0])]['ip']
-
-               if lon_x is not -1 and lat_y is not -1:
-                       coords="%s,%s" % (lon_x, lat_y)
-               else:
-                       coords="0,0"
-                       
-               h = Host(use="planetlab-host",
-                               host_name="%s" % hn,
-                               alias=hn,
-                               address=ip,
-                               d2_coords=coords,
-                               statusmap_image="icon-system.png",
-                               )
-                               #hostgroups=lb)
-
-               print h.toString()
-
-               hostname_list.append(hn)
-       
-       # NOTE: use all hostnames at site to create HostEscalations for down-notices
-       if len(hostname_list) > 0:
-
-               hn_list = ",".join(hostname_list)
-
-
-               # NOTE: this encodes 2 OK nodes as the threshold.
-               c=len(hostname_list)-1
-               w=len(hostname_list)-2
-               hs = ",".join([ "$HOSTSTATEID:%s$" % h for h in hostname_list ])
-               ss = ",".join([ "$SERVICESTATEID:%s:aSSH$" % h for h in hostname_list ])
-
-               dummy_site_host = Host(host_name="site-cluster-for-%s" % lb,
-                                               use="generic-host",
-                                               alias="site-%s" % lb,
-                                               address="1.1.1.1",
-                                               check_command="""check_cluster!"site-%s"!%s!%s!%s""" % (lb, w, c, hs),
-
-                                               check_period="24x7",
-                                               check_interval="120",
-                                               retry_interval="1",
-                                               max_check_attempts="1",
-                                               first_notification_delay=0, # 60*24*.5, # wait half a day before taking any action
-
-                                               hostgroups="allsites")
-
-               # NOTE: without a dummy site service that checks basically the same
-               #               thing, there is nothing to display for the service-status-details
-               #               page for 'allsites'
-               print dummy_site_host.toString()
-               dummy_site_service = Service(use="planetlab-service",
-                                                       host_name="site-cluster-for-%s" % lb,
-                                                       service_description="LoginSSH",
-                                                       display_name="LoginSSH",
-                                                       notifications_enabled="0",
-                                                       check_command="""check_service_cluster!"site-%s"!%s!%s!%s""" % (lb, w, c, ss))
-               print dummy_site_service.toString()
-
-
-               # NOTE: before sending any notices, attempt to reboot host twice
-               he_reboot = HostEscalation(host_name=hn_list,
-                                               first_notification=1,
-                                               last_notification=2,
-                                               notification_interval=20, # 24*60*.25,
-                                               escalation_options="d",
-                                               contacts="automate-host-reboot-contact")
-               print he_reboot.toString()
-
-               # NOTE: as long as the site-cluster is down, run the escalation
-               he_escalate = HostEscalation(host_name="site-cluster-for-%s" % lb,
-                                               first_notification=1,
-                                               last_notification=0,
-                                               notification_interval=20, # 24*60*.25,
-                                               escalation_options="d,r",
-                                               contacts="automate-policy-escalation-contact",)
-               print he_escalate.toString()
-
-               # NOTE: always send notices to techs
-               he1 = HostEscalation( host_name="site-cluster-for-%s" % lb,
-                                               first_notification=1,
-                                               last_notification=0,
-                                               notification_interval=40, # 24*60*.5,
-                                               escalation_options="r,d",
-                                               contact_groups="%s-techs" % lb)
-
-               # NOTE: only send notices to PIs after a week. (2 prior notices) 
-               he2 = HostEscalation( host_name="site-cluster-for-%s" % lb,
-                                               first_notification=4,
-                                               last_notification=0,
-                                               notification_interval=40, # 24*60*.5,
-                                               escalation_options="r,d",
-                                               contact_groups="%s-pis" % lb)
-
-               # NOTE: send notices to Slice users after two weeks. (4 prior notices) 
-               he3 = HostEscalation( host_name="site-cluster-for-%s" % lb,
-                                               first_notification=7,
-                                               last_notification=0,
-                                               notification_interval=40, # 24*60*.5,
-                                               escalation_options="r,d",
-                                               contact_groups="%s-sliceusers" % lb)
-
-               for he in [he1, he2, he3]:
-                       print he.toString()
-
-               s1 = Service(use="planetlab-service",
-                                       host_name=hn_list,
-                                       service_description="aSSH",
-                                       display_name="aSSH",
-                                       servicegroups="NET,SSH",
-                                       check_command="check_ssh!-t 120")
-               s2 = Service(use="planetlab-service",
-                                       host_name=hn_list,
-                                       service_description="bMODE",
-                                       display_name="bMODE",
-                                       servicegroups="NET,MODE",
-                                       notifications_enabled="1",
-                                       check_command="check_mode")
-               s3 = Service(use="planetlab-service",
-                                       host_name=hn_list,
-                                       service_description="cPCU",
-                                       display_name="cPCU",
-                                       servicegroups="NET,PCU",
-                                       notifications_enabled="0",
-                                       check_command="check_pcu")
-               #s4 = Service(use="planetlab-service",
-               #                       host_name=hn_list,
-               #                       service_description="dCOTOP",
-               #                       display_name="dCOTOP",
-               #                       servicegroups="NET,COTOP",
-               #                       notifications_enabled="0",
-               #                       check_command="check_http!-p 3120 -t 120")
-
-               # NOTE: if the http service is broken, then try to repair the node.
-               # TODO: how to check that this only triggers if aSSH is ok?
-               se1 = ServiceEscalation(host_name=hn_list,
-                                                               service_description="bMODE",
-                                                               first_notification=1,
-                                                               last_notification=0,
-                                                               escalation_options="w,c,r",
-                                                               notification_interval=20,
-                                                               contacts="automate-service-repair-contact")
-
-               #sd1 = ServiceDependency(host_name=hn_list,
-               #                                               service_description="aSSH",
-               #                                               dependent_service_description="bSSH806,cHTTP,dCOTOP",
-               #                                               execution_failure_criteria="w,u,c,p",)
-
-               for service in [s1,s2,s3,se1]:
-                       print service.toString()
-
diff --git a/tools/plc_users_to_nagios.py b/tools/plc_users_to_nagios.py
deleted file mode 100755 (executable)
index 114dcf0..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/python
-
-from nagiosobjects import *
-
-def getContactsAndContactGroupsFor(lb, type, email_list):
-
-       if len(email_list) == 0:
-               cg1 = ContactGroup(contactgroup_name="%s-%s" % (lb,type),
-                                               alias="%s-%s" % (lb,type))
-                                               
-               return [cg1]
-
-       contact_list = []
-       person_list = []
-       count = 0
-       for person in email_list:
-               # TODO: for testing!
-               person="soltesz+%s%s%s@cs.princeton.edu" % ( lb, type, count )
-               c1 = Contact(contact_name=person.replace("+", ""),
-                                               host_notifications_enabled=1,
-                                               service_notifications_enabled=1,
-                                               host_notification_period="24x7",
-                                               service_notification_period="24x7",
-                                               host_notification_options="d,r,s",
-                                               service_notification_options="c,r",
-                                               host_notification_commands="monitor-notify-host-by-email",
-                                               service_notification_commands="monitor-notify-service-by-email",
-                                               email=person)
-               count += 1
-               contact_list.append(c1)
-               person_list.append(person.replace("+",""))
-
-       cg1 = ContactGroup(contactgroup_name="%s-%s" % (lb,type),
-                                               alias="%s-%s" % (lb,type),
-                                               members=",".join(person_list))
-
-       contact_list.append(cg1)
-
-       return contact_list
-
-
-host_email_command = Command(command_name="monitor-notify-host-by-email",
-                                                command_line="""/usr/share/monitor/commands/mail.py --hostnotificationnumber $HOSTNOTIFICATIONNUMBER$ --notificationtype $NOTIFICATIONTYPE$ --hostname $HOSTNAME$ --hoststate $HOSTSTATE$ --hostaddress $HOSTADDRESS$ --hostoutput "$HOSTOUTPUT$" --longdatetime "$LONGDATETIME$" --notificationitype $NOTIFICATIONTYPE$ --contactemail $CONTACTEMAIL$""")
-
-service_email_command = Command(command_name="monitor-notify-service-by-email",
-                                                       command_line="""/usr/bin/printf "%b" "***** MyOpsNagios $HOSTNOTIFICATIONNUMBER$ *****\\n\\nNotification Type: $NOTIFICATIONTYPE$\\n\\nService: $SERVICEDESC$\\nHost: $HOSTALIAS$\\nAddress: $HOSTADDRESS$\\nState: $SERVICESTATE$\\n\\nDate/Time: $LONGDATETIME$\\n\\nAdditional Info:\\n\\n$SERVICEOUTPUT$" | /bin/mail -S replyto=monitor@planet-lab.org -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$""")
-
-
-print host_email_command.toString()
-print service_email_command.toString()
-
-
-from monitor.wrapper import plc
-from monitor.generic import *
-
-
-l_sites = plc.api.GetSites({'login_base' : ['asu', 'gmu', 'gt']})
-#l_sites = plc.api.GetSites([10243, 22, 10247, 138, 139, 10050, 10257, 18, 20, 
-#                                                      21, 10134, 24, 10138, 10141, 30, 31, 33, 10279, 41, 29, 10193, 10064, 81,
-#                                                      10194, 10067, 87, 10208, 10001, 233, 157, 10100, 10107])
-
-
-for site in l_sites:
-       shortname = site['abbreviated_name']
-       lb = site['login_base']
-
-       # NOTE: do duplcate groups create duplicate emails?
-       cl1 = getContactsAndContactGroupsFor(lb, "techs", plc.getTechEmails(lb))
-       cl2 = getContactsAndContactGroupsFor(lb, "pis", plc.getPIEmails(lb))
-       # NOTE: slice users will change often.
-       cl3 = getContactsAndContactGroupsFor(lb, "sliceusers", plc.getSliceUserEmails(lb))
-
-       for c in [cl1,cl2,cl3]:
-               for i in c:
-                       print i.toString()
-