import repository from arizona
[raven.git] / 2.0 / python / storkplushd
1 #! /usr/bin/env python
2
3 # Jude Nelson
4 # Parts copied from pacmand
5 #
6 # Synchronizes repository metafile
7 #
8 #           [option, long option,                     variable,                      action,       data,     default,                       metavar,                       description]
9 """arizonaconfig
10     options=[
11
12             ["-C",   "--configfile",  "configfile",  "store",       "string", "/usr/local/stork/etc/stork.conf", "FILE",      "use a different config file (/usr/local/stork/etc/stork.conf is the default)"],
13              ["",    "--push-interval",    "push_interval",    "store",   "int",    10,    None,   "Interval (in seconds) between each metafile push."],
14              ["",    "--check-interval",   "check_interval",   "store",   "int",    5,     None,   "Interval (in seconds) between checking PLuSH process status."],
15              ["",    "--plush-port",      "plush_port",        "store",    "int",   4000,  None,   "Port on which to open PLuSH."],
16             ["", "--sync", "do_sync", "store_true", None, False, None, "do not daemonize"],
17             ["", "--nest-list", "nest_list", "store", "string", "arizona_nest@planetlab1.arizona-gigapop.net:10000", "nest_list", "Comma-separated list of nest slices to which to send metadata, each in the form of slice-name@planetlab-host:portnum"],
18             ["", "--planetlab-user", "planetlab_user", "store", "string", "jnelson@email.arizona.edu", "planetlab_user", "PlanetLab user accont for PLuSH to use to access the nests (NOTE: you need to set this up separately)"],
19             ["", "--metafile-path", "metafile_path", "store", "string", "metafile.tar", "metafile_path", "Path to the tarball containing the metafile to distribute to each Stork nest"],
20             ["", "--allsites-path", "allsites_path", "store", "string", "allsites.xml", "allsites_path", "Path to the allsites.xml PlanetLab site description file for PLuSH"],
21             ["", "--directory-path", "directory_path", "store", "string", "/tmp/.directory.xml", "directory_path", "Path to the location to generate the directory.xml file needed by PLuSH to identify node slices.  THIS WILL BE OVERWRITTEN"],
22             ["", "--helper-scripts-path", "helper_scripts_path", "store", "string", "helper-scripts/", "helper_scripts_path", "Path to the helper-scripts/ directory needed by PLuSH"],
23             ["", "--experiment-path", "experiment_path", "store", "string", "get-update.xml", "experiment_path", "Path to the location to generate the get-update.xml PLuSH experiment file needed to push the metafile to the Stork nest slices.  THIS WILL BE OVERWRITTEN"],
24             ["", "--plush-cwd", "plush_cwd", "store", "string", "/tmp", "plush_cwd", "The top-level working directory for PLuSH.  A directory called \"plush\" will be created within it, to which several PLuSH files will be symlinked"],
25             ["", "--plush-command", "plush_command", "store", "string", "/usr/bin/plush", "plush_command", "The command that executes PLuSH"]
26             ]
27      includes=[]
28 """
29
30 import sys,os,signal,time
31 #should be in the same directory as the other scripts when used...
32 #sys.path += ["../python/refactor"]
33 sys.path += ["/usr/local/stork/bin"]
34 import arizonaconfig, arizonareport, arizonacomm
35 import arizonageneral
36 import threading
37 import thread
38
39 # default values
40 push_interval = 10
41 check_interval = 5
42 plush_port = 4000
43 plush_xmlrpc_port = 4001 # == plush_port + 1, as determined by the PLuSH source code
44
45 # names of files PLuSH expects to be in the same working directory as the executable
46 allsite_name = "allsites.xml"
47 experiment_name = "get-update.xml"
48 directory_name = "directory.xml"
49 helper_scripts_name = "helper-scripts"
50 metafile_name = "metafile.tar"
51
52 # change this as needed...
53 planetlab_user = "jnelson@email.arizona.edu"
54
55 # hash table of nest status
56 glo_nest_connect_status = {}
57
58 # setup the files to start PLuSH
59 def setup_plush( toplevel_dir, plush_path, allsites_path, directory_path, experiment_path, scripts_path ):
60
61    try:
62       # first thing we need to do is create a directory somewhere to stuff our Plush info into so it can run.
63       current_dir = os.getcwd()
64       os.chdir( toplevel_dir )
65       arizonageneral.makedirs_existok( toplevel_dir )
66
67       # symlink everything into place...
68       os.symlink( plush_path, toplevel_dir + "/plush" )
69       os.symlink( allsites_path, toplevel_dir + "/" + allsite_name )
70       os.symlink( directory_path, toplevel_dir + "/" + directory_name )
71       os.symlink( experiment_path, toplevel_dir + "/" + experiment_name )
72       os.symlink( scripts_path, toplevel_dir + "/" + helper_scripts_name )
73
74       os.chdir( current_dir )
75       print "[storkplushd]: populated PLuSH working directory in " + toplevel_dir
76       return True
77
78    except:
79       print "[storkplushd] setup_plush(): could not populate PLuSH working directory!"
80       return false;
81
82
83
84
85 # start the PLuSH controller
86 def start_plush( plush_dir, plush_command, plush_portnum, nest_list )
87    
88    # start the process
89    os.system( plush_dir + "/" + plush_command + str(plush_port))
90
91    print "[storkplushd]: PLuSH started; giving it some time to discover nest slices..."
92
93    # give plush some time to start up
94    time.sleep(10)
95
96    # open a connection to PLuSH
97    plush_server = xmlrpclib.Server("http://localhost:" + str(plush_portnum) + "/")
98
99    # attempt to connect to each slice
100    for nest in nest_list:
101       result = plush_server.plush.connect( nest )
102       attempts = 3
103       
104       # attempt to connect multiple times (just in case)
105       while attempts > 0:
106          if result != 1:
107             print "[storkplushd]: could not connect to host " + nest + ", retrying..."
108             attempts = attempts - 1
109
110          else:
111             glo_nest_connect_status[nest] = True
112             break
113
114          time.sleep(2)
115          result = plush_server.plush.connect( nest )
116
117       if attempts == 0:
118          print "[storkplushd]: giving up on host " + nest + "..."
119
120    #for
121          
122
123    print "[storkplushd]: Initialized PLuSH"
124
125
126
127
128
129
130 # is PLuSH running?
131 def is_plush_running():
132    cmd = os.popen("ps aux")
133    proc_list = cmd.readlines()
134    running = False
135
136    cmd.close()
137
138    for proc in proc_list:
139       plush_proc = proc.find("plush")
140       if plush_proc >= 0:
141          running = True
142          break
143
144    return running
145 # end
146
147
148
149
150 # generate a directory file that will store the hostnames and port numbers of each planetlab nest host (to feed into plush)
151 # Returns a string of XML that can be written to a directory.xml file.
152 def gen_plush_directory( planetlab_account, allsite_filename, nest_list_csv ):
153    # write the XML code...
154    xml_str =
155    "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
156        <plush>\n\
157           <resource_manager type=\"planetlab\">\n\
158              <user>" + planetlab_account + "</user>\n\
159              <allsites>" + allsite_filename + "</allsites>\n"
160
161    nest_list = nest_list_csv.split(",")
162
163    # add the nest lists
164    for nest_slice in nest_list:
165       nest_slice = nest_slice.strip()
166
167       (nest_host, nest_port) = nest_slice.split(":")
168       xml_str += "<port_map slice=\"" + nest_host + "\" port=\"" + nest_port + "\"/>\n"
169
170    # add the remaining invariant code
171    xml_str += 
172    "   </resource_manager>\n\
173        <resource_manager type=\"ssh\">\n\
174        </resource_manager>\n\
175    </plush>"
176
177    return xml_str; 
178
179
180
181
182
183 # generate an experiment file that will propogate the metafile to each nest using Plush, given a path to the metafile to deploy and a CSV list of nests.
184 # returns a string of XML that can be written as a Plush experiment file
185 def gen_plush_experiment( metafile_path, nest_list_csv ):
186    # how many hosts?
187    nest_list = nest_list_csv.split(":")
188    num_hosts = len( nest_list )
189
190    # write out the XML code
191    xml_str =
192    "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\
193    <plush>\n\
194       <project name=\"metadata\">\n\
195          <software name=\"get-metadata\" type=\"none\">\n\
196             <package name=\"package1\" type=\"tar\">\n\
197                <path>" + metafile_path + "</path>\n\
198              </package>\n\
199           </software>\n\
200           <component name=\"mynodes\">\n\
201              <rspec>\n\
202                 <num_hosts>" + str(num_hosts) + "</num_hosts>\n\
203              </rspec>\n\
204           <software name=\"get-metadata\" />\n\
205              <resources>"
206
207     # add planetlab slices
208     for nest_slice in nest_list:
209        nest_slice = nest_slice.strip()
210
211        (nest_host, nest_port) = nest_slice.split(":")
212        xml_str += "       <resource type=\"planetlab\" group=\"" + nest_host + "\"/>\n"
213
214     # add remaining invariant XML code
215     xml_str +=
216     "        </resources>\n\
217         </component>\n\
218         <experiment name=\"download-metadata\">\n\
219            <execution>\n\
220               <component_block name=\"cb1\">\n\
221                  <component name=\"mynodes\" />\n\
222                    <process_block name=\"p2\">\n\
223                       <process name=\"echo\">\n\
224                          <path>/bin/echo</path>\n\
225                          <cmdline>\n\
226                             <arg>\"Run success\"</arg>\n\
227                          </cmdline>\n\
228                          <cwd/>\n\
229                       </process>\n\
230                    </process_block>\n\
231                </component_block>\n\
232            </execution>\n\
233         </experiment>\n\
234      </project>\n\
235    </plush>\n"
236
237    return xml_str
238
239
240
241
242 # simple method to write a file, given a string (used to create PLuSH XML files)
243 def make_file( filename, data_str ):
244    try:
245       file = open( filename, "w" )
246       file.writelines( data_str )
247       file.close()
248       return True
249    except:
250       print "[storkplushd]: could not create file " + filename
251       return False
252
253
254
255 # regenerate the experiment file (e.g. in case we gain/lose nests)
256 def refresh_plush_experiment( metafile_path, experiment_path )
257
258     # generate the experiment file (now that we know which nests are active)
259     active_nest_list = ""
260     for nest,status in glo_nest_connect_status.iteritems():
261        if status == True:
262           active_nest_list = active_nest_list + nest + ","
263
264     
265     # generate experiment
266     experiment_xml = gen_plush_experiment( metafile_path, active_nest_list )
267     if make_file( experiment_path, experiment_xml ) == False:
268       return False
269
270     return True
271
272
273
274
275
276
277 # tell PLuSH to push the metafile out every so often by signaling it 
278 class PLuSH_Handler(threading.Thread):
279
280    # initialize with the proxy server through which to talk to Plush
281    def __init__(self, plush_url, timeout, plush_info, verboseness):
282       threading.Thread.__init__(self)
283       self.verboseness = verboseness
284       self.timeout_delta = timeout
285       self.timeout = time.time() + timeout
286       self.plush_url = plush_url
287       self.plush_info = plush_info
288       self.start()
289
290    # tell PLuSH to push the metafile every so often
291    def run(self):
292       while True:
293          # expired? then signal PLuSH
294          if self.timeout < time.time() :
295             self.timeout = time.time() + self.timeout_delta
296
297             # make sure plush is running
298             if is_plush_running() == False:
299                start_plush( self.plush_info['plush_dir'], self.plush_info['plush_command'], self.plush_info['plush_portnum'], self.plush_info['nest_list'] )
300                if refresh_plush_experiment( self.plush_info['metafile_path'], self.plush_info['experiment_path']) == False:
301                   print "[storkplushd]: could not regenerate PLuSH experiment file!"
302
303             else:
304                # load and run the experiment
305                plush_server = xmlrpclib.Server( "http://localhost:" + self.plush_info['plush_portnum'] + "/")
306                plush_server.plush.load( self.plush_info['experiment_path'] )
307                plush_server.plush.run()
308
309          # end if
310       # end while
311    # end def
312 #end class
313
314
315
316
317 # check the PLuSH process every so often and re-spawn it as needed
318 class PLuSH_Poller(threading.Thread):
319    """
320    <Purpose>
321       Initialize the thread to begin monitoring the PLuSH process
322    """
323    def __init__(self, timeout, verboseness):
324       threading.Thread.__init__(self)
325       self.timeout = time.time() + timeout    # poll every few seconds
326       self.timeout_delta = timeout
327       self.verboseness = verboseness
328
329       print "[storkplushd] will check PLuSH process every " + str(timeout) + " seconds."
330       self.start()
331
332    """
333    <Purpose>
334       Check to see if PLuSH is still running every so often.
335    """
336    def run(self):
337       while True:
338           # are we expired?
339           if self.timeout < time.time() :
340              # new timeout...
341              self.timeout = time.time() + self.timeout_delta
342          
343              # if it's not running, then spawn it
344              if is_plush_running() == False:
345                start_plush( self.plush_info['plush_dir'], self.plush_info['plush_command'], self.plush_info['plush_portnum'], self.plush_info['nest_list'] )
346                if refresh_plush_experiment( self.plush_info['metafile_path'], self.plush_info['experiment_path'] ) == False:
347                   print "[storkplushd]: could not refresh PLuSH experiment file!"
348
349              # are there any nests we should attempt to recontact?
350              contact_list = ""
351              for nest,status in glo_nest_connect_status.iteritems():
352                 if status == False:
353                    plush_server = xmlrpclib.Server("http://localhost:" + self.plush_info['plush_portnum'] + "/" )
354                    result = plush_server.connect( nest )
355                    if result == 1:
356                      contact_list += nest + ","
357
358              # should we regenerate the experiment file?
359              if len( contact_list ) > 0:
360                 if refresh_plush_experiment( self.plush_info['metafile_path'], self.plush_info['experiment_path'] ) == False:
361                    print "[storkplushd]: could not refresh PLuSH experiment file!"
362
363
364           else:
365             # sleep some to save CPU
366             time.sleep( self.timeout_delta )
367          # end
368       # end
369    # end
370 # end
371
372 def handler_sighup(signum, frame):
373     """
374     <Purpose>
375        Intercepts the "hangup" signal, but doesn't do anything.
376        Simply causes the sleep to return.
377     """
378     pass
379
380
381 def Main():
382     global sync
383     global verbose
384     global pacman_update_event
385     global plush_command
386     global plush_port
387     global push_interval
388     global check_interval
389
390     args = arizonaconfig.init_options("storkplushd",version="2.0", configfile_optvar="configfile")
391
392     
393     loc_push_interval = arizonaconfig.get_option("push_interval")
394     loc_check_interval = arizonaconfig.get_option("check_interval")
395     loc_experiment_path = arizonaconfig.get_option("experiment_path")
396     loc_nest_directory_path = arizonaconfig.get_option("nest_directory_path")
397     loc_plush_port = arizonaconfig.get_option("plush_port")
398     
399     loc_experiment_path = arizonaconfig.get_option("experiment_path")
400     loc_directory_path = arizonaconfig.get_option("directory_path")
401     loc_scripts_path = arizonaconfig.get_option("helper_scripts_path")
402     loc_allsites_path = arizonaconfig.get_option("allsites_path")
403     loc_metafile_path = arizonaconfig.get_option("metafile_path")
404     loc_plush_cwd = arizonaconfig.get_option("plush_cwd")
405     loc_plush_command = arizonaconfig.get_option("plush_command")
406
407     do_sync = arizonaconfig.get_option("do_sync")
408     verbose = arizonaconfig.get_option("verbose")
409     nest_list = arizonaconfig.get_option("nest_list")
410     loc_planetlab_user = arizonaconfig.get_option("planetlab_user")
411
412     if loc_push_interval == None:
413        loc_push_interval = push_interval
414
415     if loc_check_interval == None:
416        loc_check_interval = check_interval
417
418     if loc_plush_port == None:
419        loc_plush_port = plush_port
420
421     if loc_planetlab_user == None:
422        loc_planetlab_user = planetlab_user
423
424     loc_plush_url = "http://localhost:" + str(loc_plush_port + 1) + "/"
425
426     # generate necessary PLuSH directory.xml file
427     directory_xml = gen_plush_directory( loc_planetlab_user, loc_allsites_path, nest_list )
428
429     if make_file( loc_directory_path, directory_xml ) == False:
430        exit(1)
431
432     # calculate a list of nests
433     nest_array_tmp = nest_list.split(",")
434     nest_array = []
435     for nest in nest_array_tmp:
436        nest = nest.strip()
437        nest_array.append( nest )
438
439     # we haven't connected to anyone yet, so zero out our connection list
440     for nest in nest_array:
441        glo_nest_connect_status[nest] = False
442
443     # accumulate PLuSH info into a dictionary
444     plush_info = {
445          'allsites_path':loc_allsites_path,
446          'metafile_path':loc_metafile_path,
447          'experiment_path':loc_experiment_path,
448          'directory_path':loc_directory_path,
449          'scripts_path':loc_scripts_path,
450          'plush_dir':loc_plush_cwd + "/plush",
451          'plush_portnum':loc_plush_port + 1,
452          'plush_command':loc_plush_command
453          'nest_list':nest_array
454     }
455
456     # set the hangup signal handler
457     signal.signal(signal.SIGHUP, handler_sighup)
458
459     if do_sync == False:
460        # run as a daemon
461        arizonageneral.make_daemon("storkplushd")
462
463    
464     # setup plush files
465     if setup_plush( plush_info['plush_dir'], plush_info['plush_command'], plush_info['allsites_path'],
466                     plush_info['directory_path'], plush_info['experiment_path'], plush_info['scripts_path'] ) == False:
467        exit(1)
468
469     # start PLuSH running!
470     start_plush( plush_info['plush_dir'], plush_info['plush_command'], plush_info['plush_portnum'], plush_info['nest_list'] )
471
472     # create our experiment file
473     if refresh_plush_experiment( plush_info['metafile_path'], plush_info['experiment_path'] ) == False:
474        print "[storkplushd]: could not create initial PLuSH experiment file!"
475        exit(1)
476
477     # start monitoring plush
478     plush_mon = PLuSH_Poller( loc_check_interval, plush_info, 1 )
479
480     # start handling plush
481     plush_handler = PLuSH_Handler( loc_plush_url, loc_push_interval, plush_info, 1 )
482
483     sleep_time = loc_check_interval
484     if sleep_time > loc_push_interval:
485       sleep_time = loc_push_interval
486
487     while True:
488        time.sleep( sleep_time / 2.0 )
489
490
491 if __name__ == "__main__":
492     Main()