first draft for a tracker-less rewrite
Thierry Parmentelat [Thu, 15 Sep 2011 12:27:31 +0000 (14:27 +0200)]
13 files changed:
system/LocalSubstrate.inria.py [new file with mode: 0755]
system/LocalTestResources.sample.inria [deleted file]
system/LocalTestResources.sample.princeton [deleted file]
system/Substrate.py [new file with mode: 0644]
system/TestMain.py
system/TestMapper.py
system/TestNode.py
system/TestPlc.py
system/TestPool.py [deleted file]
system/TestResources.py [deleted file]
system/TestSsh.py
system/Trackers.py [deleted file]
system/config_default.py

diff --git a/system/LocalSubstrate.inria.py b/system/LocalSubstrate.inria.py
new file mode 100755 (executable)
index 0000000..465d8b2
--- /dev/null
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# Thierry Parmentelat <thierry.parmentelat@inria.fr>
+# Copyright (C) 2010 INRIA 
+#
+# this is only an example file
+# the actual file is installed in your testmaster box as /root/LocalTestResources.py
+# 
+
+if __name__ == '__main__':
+   import sys, os.path
+   sys.path.append(os.path.expanduser("~/git-tests/system"))
+
+from Substrate import Substrate
+
+# domain name .pl.sophia.inria.fr is implicit on our network
+class OnelabSubstrate (Substrate):
+
+   # the build boxes we use 
+   def build_boxes_spec (self):
+      return [ 'liquid', 'reed', 'velvet', ]
+
+   # the vs-capable box for PLCs
+   def plc_boxes_spec (self):
+      return [ ('vs64-1', 10),  # how many plcs max in this box
+               ]  
+
+   # vplc01 to 15
+   def vplc_ips (self):
+      return [  ( 'vplc%02d'%i,                 # DNS name
+#                  '02:34:56:00:ee:%02d'%i)     # MAC address 
+                  'unused')                     # MAC address 
+                for i in range(1,5) ] # 21
+
+   def qemu_boxes_spec (self):
+      return [
+#         ('kvm64-1', 3), # how many plcs max in this box
+         ('kvm64-2', 3),
+#         ('kvm64-3', 3),
+#         ('kvm64-4', 3),
+#         ('kvm64-5', 3),
+#         ('kvm64-6', 3),
+         ]
+
+   # the nodes pool has a MAC address as user-data (3rd elt in tuple)
+   def vnode_ips (self):
+      return [ ( 'vnode%02d'%i,                 # DNS name               
+                 '02:34:56:00:00:%02d'%i)       # MAC address
+               for i in range(1,5) ] # 21
+   
+   # local network settings
+   def domain (self):
+      return 'pl.sophia.inria.fr'
+
+   def network_settings (self):
+      return { 'interface_fields:gateway':'138.96.112.250',
+               'interface_fields:network':'138.96.112.0',
+               'interface_fields:broadcast':'138.96.119.255',
+               'interface_fields:netmask':'255.255.248.0',
+               'interface_fields:dns1': '138.96.112.1',
+               'interface_fields:dns2': '138.96.112.2',
+               }
+
+# the hostname for the testmaster - in case we'd like to run this remotely
+   def testmaster (self): 
+      return 'testmaster'
+
+local_substrate = OnelabSubstrate ()
+
+if __name__ == '__main__':
+   local_substrate.main()
diff --git a/system/LocalTestResources.sample.inria b/system/LocalTestResources.sample.inria
deleted file mode 100755 (executable)
index 8eaad0f..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/python
-# Thierry Parmentelat <thierry.parmentelat@inria.fr>
-# Copyright (C) 2010 INRIA 
-#
-# this is only an example file
-# the actual file is installed in your testmaster box as /root/LocalTestResources.py
-# 
-
-if __name__ == '__main__':
-   import sys, os.path
-   sys.path.append(os.path.expanduser("~/git-tests/system"))
-
-from TestResources import TestResources
-
-class OnelabTestResources (TestResources):
-    
-    # we use only one for now but who knows
-    def plc_boxes (self):
-        return [ 'testplc.pl.sophia.inria.fr' ]
-
-    def network_dict (self):
-        return { 'interface_fields:gateway':'138.96.112.250',
-                 'interface_fields:network':'138.96.112.0',
-                 'interface_fields:broadcast':'138.96.119.255',
-                 'interface_fields:netmask':'255.255.248.0',
-                 'interface_fields:dns1': '138.96.112.1',
-                 'interface_fields:dns2': '138.96.112.2',
-                 }
-
-    def nodes_ip_pool (self):
-        return [ ( 'vnode%02d.pl.sophia.inria.fr'%i, 
-                   '138.96.112.%d'%(110+i), 
-                   '02:34:56:00:00:%02d'%i) for i in range(1,8) ]
-    
-    # 32bits : qemu32-[1-5] + 64bits : qemu64-[1-3]
-    # removing qemu64-3 until it gets on the right IP segment again
-    def qemus_ip_pool (self):
-        return [
-            ('kvm64-1.pl.sophia.inria.fr', None, None),
-            ('kvm64-2.pl.sophia.inria.fr', None, None),
-            ('kvm64-3.pl.sophia.inria.fr', None, None),
-            ('kvm64-4.pl.sophia.inria.fr', None, None),
-            ('kvm64-5.pl.sophia.inria.fr', None, None),
-            ('kvm64-6.pl.sophia.inria.fr', None, None),
-            ]
-
-    def max_qemus (self):
-       # let's be tight; nighlty builds.sh wipes it clean beforehand
-        return len(self.qemus_ip_pool())
-
-    # 1 to 15
-    def plcs_ip_pool (self):
-        return [  ( 'vplc%02d.pl.sophia.inria.fr'%i,
-                    '138.96.112.%d'%(70+i),
-                    '02:34:56:00:ee:%02d'%i) for i in range(1,16) ]
-
-    def max_plcs (self):
-       # leave space for the triangle setups
-        return len(self.plcs_ip_pool())-3
-
-    def preferred_hostname (self):
-        return None
-
-local_resources = OnelabTestResources ()
-
-if __name__ == '__main__':
-   for (h,_,__) in local_resources.qemus_ip_pool(): print h
diff --git a/system/LocalTestResources.sample.princeton b/system/LocalTestResources.sample.princeton
deleted file mode 100755 (executable)
index aa623a1..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/python
-# Thierry Parmentelat <thierry.parmentelat@inria.fr>
-# Copyright (C) 2010 INRIA 
-#
-# this is the file that gets installed at onelab
-# in our testmaster box as /root/LocalTestResources.py
-# 
-
-if __name__ == '__main__':
-   import sys, os.path
-   sys.path.append(os.path.expanduser("~/git-tests/system"))
-
-from TestResources import TestResources
-
-class PlanetlabTestResources (TestResources):
-    
-    # we use only one for now but who knows
-    def plc_boxes (self):
-        return [ 'testbox.test.planet-lab.org' ]
-
-    def network_dict (self):
-        return { 'interface_fields:gateway':   '128.112.139.1',
-                 'interface_fields:network':   '128.112.139.0',
-                 'interface_fields:broadcast': '128.112.139.127',
-                 'interface_fields:netmask':   '255.255.255.128',
-                 'interface_fields:dns1':      '128.112.136.10',
-                 'interface_fields:dns2':      '128.112.136.12',
-                 }
-
-    def nodes_ip_pool (self):
-        return [ ("node-01.test.planet-lab.org", "128.112.139.44", "de:ad:be:ef:00:10"),
-                 ("node-02.test.planet-lab.org", "128.112.139.66", "de:ad:be:ef:00:20"),
-                 ]
-    
-    def qemus_ip_pool (self):
-        return [  ( 'testqemu1.test.planet-lab.org', None, None ) ]
-
-    def max_qemus (self):
-        return 1
-
-    def plcs_ip_pool (self):
-        return [ ("pl-service-08.CS.Princeton.EDU","128.112.139.34", "de:ad:be:ef:ff:01"),
-                 ("pl-service-09.CS.Princeton.EDU","128.112.139.35", "de:ad:be:ef:ff:02"),
-                 ("pl-service-10.CS.Princeton.EDU","128.112.139.36", "de:ad:be:ef:ff:03"),
-                 ("pl-service-11.CS.Princeton.EDU","128.112.139.37", "de:ad:be:ef:ff:04"),
-                 ("pl-service-12.CS.Princeton.EDU","128.112.139.41", "de:ad:be:ef:ff:05"),
-                 ]
-
-    def max_plcs (self):
-        return 4
-
-    def preferred_hostname (self):
-        return "plc"
-
-local_resources = PlanetlabTestResources ()
-
-if __name__ == '__main__':
-   for (h,_,__) in local_resources.qemus_ip_pool(): print h
diff --git a/system/Substrate.py b/system/Substrate.py
new file mode 100644 (file)
index 0000000..e660b47
--- /dev/null
@@ -0,0 +1,742 @@
+#
+# Thierry Parmentelat <thierry.parmentelat@inria.fr>
+# Copyright (C) 2010 INRIA 
+#
+# #################### history
+#
+# This is a complete rewrite of TestResources/Tracker/Pool
+# we don't use trackers anymore and just probe/sense the running 
+# boxes to figure out where we are
+# in order to implement some fairness in the round-robin allocation scheme
+# we need an indication of the 'age' of each running entity, 
+# hence the 'timestamp-*' steps in TestPlc
+# 
+# this should be much more flexible:
+# * supports several plc boxes 
+# * supports several qemu guests per host
+# * no need to worry about tracker being in sync or not
+#
+# #################### howto use
+#
+# each site is to write its own LocalSubstrate.py, 
+# (see e.g. LocalSubstrate.inria.py)
+# LocalSubstrate.py is expected to be in /root on the testmaster box
+# and needs to define
+# MYPLCs
+# . the vserver-capable boxes used for hosting myplcs
+# .  and their admissible load (max # of myplcs)
+# . the pool of DNS-names and IP-addresses available for myplcs
+# QEMU nodes
+# . the kvm-qemu capable boxes to host qemu instances
+# .  and their admissible load (max # of myplcs)
+# . the pool of DNS-names and IP-addresses available for nodes
+# 
+# ####################
+
+import os.path, sys
+import time
+import re
+import traceback
+import subprocess
+import commands
+import socket
+from optparse import OptionParser
+
+import utils
+from TestSsh import TestSsh
+from TestMapper import TestMapper
+
+def header (message,banner=True):
+    if not message: return
+    if banner: print "===============",
+    print message
+    sys.stdout.flush()
+
+def timestamp_sort(o1,o2): 
+    if not o1.timestamp:        return -1
+    elif not o2.timestamp:      return 1
+    else:                       return o2.timestamp-o1.timestamp
+
+####################
+# pool class
+# allows to pick an available IP among a pool
+# input is expressed as a list of tuples (hostname,ip,user_data)
+# that can be searched iteratively for a free slot
+# e.g.
+# pool = [ (hostname1,user_data1),  
+#          (hostname2,user_data2),  
+#          (hostname3,user_data2),  
+#          (hostname4,user_data4) ]
+# assuming that ip1 and ip3 are taken (pingable), then we'd get
+# pool=Pool(pool)
+# pool.next_free() -> entry2
+# pool.next_free() -> entry4
+# pool.next_free() -> None
+# that is, even if ip2 is not busy/pingable when the second next_free() is issued
+
+class PoolItem:
+    def __init__ (self,hostname,userdata):
+        self.hostname=hostname
+        self.userdata=userdata
+        # slot holds 'busy' or 'free' or 'fake' or None
+        self.status=None
+        self.ip=None
+
+    def line(self):
+        return "Pooled %s (%s) -> %s"%(self.hostname,self.userdata, self.status)
+    def get_ip(self):
+        if self.ip: return self.ip
+        ip=socket.gethostbyname(self.hostname)
+        self.ip=ip
+        return ip
+
+class Pool:
+
+    def __init__ (self, tuples,message):
+        self.pool= [ PoolItem (h,u) for (h,u) in tuples ] 
+        self.message=message
+        self._sensed=False
+
+    def sense (self):
+        if self._sensed: return
+        print 'Checking IP pool',self.message,
+        for item in self.pool:
+            if self.check_ping (item.hostname): item.status='busy'
+            else:                               item.status='free'
+        self._sensed=True
+        print 'Done'
+
+    def list (self):
+        for i in self.pool: print i.line()
+
+    def retrieve_userdata (self, hostname):
+        for i in self.pool: 
+            if i.hostname==hostname: return i.userdata
+        return None
+
+    def get_ip (self, hostname):
+        # use cached if in pool
+        for i in self.pool: 
+            if i.hostname==hostname: return i.get_ip()
+        # otherwise just ask dns again
+        return socket.gethostbyname(hostname)
+
+    def next_free (self):
+        for i in self.pool:
+            if i.status in ['busy','fake']: continue
+            i.status='fake'
+            return (i.hostname,i.userdata)
+        raise Exception,"No IP address available in pool %s"%self.message
+
+# OS-dependent ping option (support for macos, for convenience)
+    ping_timeout_option = None
+# checks whether a given hostname/ip responds to ping
+    def check_ping (self,hostname):
+        if not Pool.ping_timeout_option:
+            (status,osname) = commands.getstatusoutput("uname -s")
+            if status != 0:
+                raise Exception, "TestPool: Cannot figure your OS name"
+            if osname == "Linux":
+                Pool.ping_timeout_option="-w"
+            elif osname == "Darwin":
+                Pool.ping_timeout_option="-t"
+
+        command="ping -c 1 %s 1 %s"%(Pool.ping_timeout_option,hostname)
+        (status,output) = commands.getstatusoutput(command)
+        if status==0:   print '+',
+        else:           print '-',
+        return status == 0
+
+####################
+class Box:
+    def __init__ (self,hostname):
+        self.hostname=hostname
+    def simple_hostname (self):
+        return self.hostname.split('.')[0]
+    def test_ssh (self): return TestSsh(self.hostname,username='root',unknown_host=False)
+    def reboot (self):
+        self.test_ssh().run("shutdown -r now",message="Rebooting %s"%self.hostname)
+
+    def run(self,argv,message=None,trash_err=False,dry_run=False):
+        if dry_run:
+            print 'DRY_RUN:',
+            print " ".join(argv)
+            return 0
+        else:
+            header(message)
+            if not trash_err:
+                return subprocess.call(argv)
+            else:
+                return subprocess.call(argv,stderr=file('/dev/null','w'))
+                
+    def run_ssh (self, argv, message, trash_err=False):
+        ssh_argv = self.test_ssh().actual_argv(argv)
+        result=self.run (ssh_argv, message, trash_err)
+        if result!=0:
+            print "WARNING: failed to run %s on %s"%(" ".join(argv),self.hostname)
+        return result
+
+    def backquote (self, argv, trash_err=False):
+        if not trash_err:
+            return subprocess.Popen(argv,stdout=subprocess.PIPE).communicate()[0]
+        else:
+            return subprocess.Popen(argv,stdout=subprocess.PIPE,stderr=file('/dev/null','w')).communicate()[0]
+
+    def backquote_ssh (self, argv, trash_err=False):
+        # first probe the ssh link
+        probe_argv=self.test_ssh().actual_argv(['hostname'])
+        hostname=self.backquote ( probe_argv, trash_err=True )
+        if not hostname:
+            print "root@%s unreachable"%self.hostname
+            return ''
+        else:
+            return self.backquote( self.test_ssh().actual_argv(argv), trash_err)
+
+############################################################
+class BuildInstance:
+    def __init__ (self, buildname, pid, buildbox):
+        self.buildname=buildname
+        self.buildbox=buildbox
+        self.pids=[pid]
+
+    def add_pid(self,pid):
+        self.pids.append(pid)
+
+    def line (self):
+        return "== %s == (pids=%r)"%(self.buildname,self.pids)
+
+class BuildBox (Box):
+    def __init__ (self,hostname):
+        Box.__init__(self,hostname)
+        self.build_instances=[]
+
+    def add_build (self,buildname,pid):
+        for build in self.build_instances:
+            if build.buildname==buildname: 
+                build.add_pid(pid)
+                return
+        self.build_instances.append(BuildInstance(buildname, pid, self))
+
+    def list(self):
+        if not self.build_instances: 
+            header ('No build process on %s (%s)'%(self.hostname,self.uptime()))
+        else:
+            header ("Builds on %s (%s)"%(self.hostname,self.uptime()))
+            for b in self.build_instances: 
+                header (b.line(),banner=False)
+
+    def uptime(self):
+        if hasattr(self,'_uptime') and self._uptime: return self._uptime
+        return '*undef* uptime'
+
+    # inspect box and find currently running builds
+    matcher=re.compile("\s*(?P<pid>[0-9]+).*-[bo]\s+(?P<buildname>[^\s]+)(\s|\Z)")
+    def sense(self,reboot=False,verbose=True):
+        if reboot:
+            self.reboot(box)
+            return
+        print 'b',
+        command=['uptime']
+        self._uptime=self.backquote_ssh(command,trash_err=True).strip()
+        if not self._uptime: self._uptime='unreachable'
+        pids=self.backquote_ssh(['pgrep','build'],trash_err=True)
+        if not pids: return
+        command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
+        ps_lines=self.backquote_ssh (command).split('\n')
+        for line in ps_lines:
+            if not line.strip() or line.find('PID')>=0: continue
+            m=BuildBox.matcher.match(line)
+            if m: self.add_build (m.group('buildname'),m.group('pid'))
+            else: header('command %r returned line that failed to match'%command)
+
+############################################################
+class PlcInstance:
+    def __init__ (self, vservername, ctxid, plcbox):
+        self.vservername=vservername
+        self.ctxid=ctxid
+        self.plc_box=plcbox
+        # unknown yet
+        self.timestamp=None
+
+    def set_timestamp (self,timestamp): self.timestamp=timestamp
+    def set_now (self): self.timestamp=int(time.time())
+    def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+
+    def line (self):
+        msg="== %s == (ctx=%s)"%(self.vservername,self.ctxid)
+        if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
+        else:              msg += " *unknown timestamp*"
+        if self.ctxid==0: msg+=" not (yet?) running"
+        return msg
+
+    def kill (self):
+        msg="vserver stopping %s on %s"%(self.vservername,self.plc_box.hostname)
+        self.plc_box.run_ssh(['vserver',self.vservername,'stop'],msg)
+        self.plc_box.forget(self)
+
+class PlcBox (Box):
+    def __init__ (self, hostname, max_plcs):
+        Box.__init__(self,hostname)
+        self.plc_instances=[]
+        self.max_plcs=max_plcs
+
+    def add_vserver (self,vservername,ctxid):
+        for plc in self.plc_instances:
+            if plc.vservername==vservername: 
+                header("WARNING, duplicate myplc %s running on %s"%\
+                           (vservername,self.hostname),banner=False)
+                return
+        self.plc_instances.append(PlcInstance(vservername,ctxid,self))
+    
+    def forget (self, plc_instance):
+        self.plc_instances.remove(plc_instance)
+
+    # fill one slot even though this one is not started yet
+    def add_fake (self, plcname):
+        fake=PlcInstance('fake_'+plcname,0,self)
+        fake.set_now()
+        self.plc_instances.append(fake)
+
+    def line(self): 
+        msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_plcs,self.free_spots(),self.uname())
+        return msg
+        
+    def list(self):
+        if not self.plc_instances: 
+            header ('No vserver running on %s'%(self.line()))
+        else:
+            header ("Active plc VMs on %s"%self.line())
+            for p in self.plc_instances: 
+                header (p.line(),banner=False)
+
+    def free_spots (self):
+        return self.max_plcs - len(self.plc_instances)
+
+    def uname(self):
+        if hasattr(self,'_uname') and self._uname: return self._uname
+        return '*undef* uname'
+
+    def plc_instance_by_vservername (self, vservername):
+        for p in self.plc_instances:
+            if p.vservername==vservername: return p
+        return None
+
+    def sense (self, reboot=False, soft=False):
+        if reboot:
+            # remove mark for all running servers to avoid resurrection
+            stop_command=['rm','-rf','/etc/vservers/*/apps/init/mark']
+            self.run_ssh(stop_command,"Removing all vserver marks on %s"%self.hostname)
+            if not soft:
+                self.reboot()
+                return
+            else:
+                self.run_ssh(['service','util-vserver','stop'],"Stopping all running vservers")
+            return
+        print 'p',
+        self._uname=self.backquote_ssh(['uname','-r']).strip()
+        # try to find fullname (vserver_stat truncates to a ridiculously short name)
+        # fetch the contexts for all vservers on that box
+        map_command=['grep','.','/etc/vservers/*/context','/dev/null',]
+        context_map=self.backquote_ssh (map_command)
+        # at this point we have a set of lines like
+        # /etc/vservers/2010.01.20--k27-f12-32-vplc03/context:40144
+        ctx_dict={}
+        for map_line in context_map.split("\n"):
+            if not map_line: continue
+            [path,xid] = map_line.split(':')
+            ctx_dict[xid]=os.path.basename(os.path.dirname(path))
+        # at this point ctx_id maps context id to vservername
+
+        command=['vserver-stat']
+        vserver_stat = self.backquote_ssh (command)
+        for vserver_line in vserver_stat.split("\n"):
+            if not vserver_line: continue
+            context=vserver_line.split()[0]
+            if context=="CTX": continue
+            longname=ctx_dict[context]
+            self.add_vserver(longname,context)
+#            print self.margin_outline(self.vplcname(longname)),"%(vserver_line)s [=%(longname)s]"%locals()
+
+        # scan timestamps
+        command=   ['grep','.']
+        command += ['/vservers/%s/timestamp'%b for b in ctx_dict.values()]
+        command += ['/dev/null']
+        ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
+        for ts_line in ts_lines:
+            if not ts_line.strip(): continue
+            # expect /vservers/<vservername>/timestamp:<timestamp>
+            try:
+                (_,__,vservername,tail)=ts_line.split('/')
+                (_,timestamp)=tail.split(':')
+                timestamp=int(timestamp)
+                q=self.plc_instance_by_vservername(vservername)
+                if not q: 
+                    print 'WARNING unattached plc instance',ts_line
+                    continue
+                q.set_timestamp(timestamp)
+            except:  print 'WARNING, could not parse ts line',ts_line
+        
+
+
+
+############################################################
+class QemuInstance: 
+    def __init__ (self, nodename, pid, qemubox):
+        self.nodename=nodename
+        self.pid=pid
+        self.qemu_box=qemubox
+        # not known yet
+        self.buildname=None
+        self.timestamp=None
+        
+    def set_buildname (self,buildname): self.buildname=buildname
+    def set_timestamp (self,timestamp): self.timestamp=timestamp
+    def set_now (self): self.timestamp=int(time.time())
+    def pretty_timestamp (self): return time.strftime("%Y-%m-%d:%H-%M",time.localtime(self.timestamp))
+    
+    def line (self):
+        msg = "== %s == (pid=%s)"%(self.nodename,self.pid)
+        if self.buildname: msg += " <--> %s"%self.buildname
+        else:              msg += " *unknown build*"
+        if self.timestamp: msg += " @ %s"%self.pretty_timestamp()
+        else:              msg += " *unknown timestamp*"
+        if self.pid:       msg += "pid=%s"%self.pid
+        else:              msg += " not (yet?) running"
+        return msg
+    
+    def kill(self):
+        if self.pid==0: print "cannot kill qemu %s with pid==0"%self.nodename
+        msg="Killing qemu %s with pid=%s on box %s"%(self.nodename,self.pid,self.qemu_box.hostname)
+        self.qemu_box.run_ssh(['kill',"%s"%self.pid],msg)
+        self.qemu_box.forget(self)
+
+
+class QemuBox (Box):
+    def __init__ (self, hostname, max_qemus):
+        Box.__init__(self,hostname)
+        self.qemu_instances=[]
+        self.max_qemus=max_qemus
+
+    def add_node (self,nodename,pid):
+        for qemu in self.qemu_instances:
+            if qemu.nodename==nodename: 
+                header("WARNING, duplicate qemu %s running on %s"%\
+                           (nodename,self.hostname), banner=False)
+                return
+        self.qemu_instances.append(QemuInstance(nodename,pid,self))
+
+    def forget (self, qemu_instance):
+        self.qemu_instances.remove(qemu_instance)
+
+    # fill one slot even though this one is not started yet
+    def add_fake (self, nodename):
+        fake=QemuInstance('fake_'+nodename,0,self)
+        fake.set_now()
+        self.qemu_instances.append(fake)
+
+    def line (self):
+        msg="%s [max=%d,%d free] (%s)"%(self.hostname, self.max_qemus,self.free_spots(),self.driver())
+        return msg
+
+    def list(self):
+        if not self.qemu_instances: 
+            header ('No qemu process on %s'%(self.line()))
+        else:
+            header ("Active qemu processes on %s"%(self.line()))
+            for q in self.qemu_instances: 
+                header (q.line(),banner=False)
+
+    def free_spots (self):
+        return self.max_qemus - len(self.qemu_instances)
+
+    def driver(self):
+        if hasattr(self,'_driver') and self._driver: return self._driver
+        return '*undef* driver'
+
+    def qemu_instance_by_pid (self,pid):
+        for q in self.qemu_instances:
+            if q.pid==pid: return q
+        return None
+
+    def qemu_instance_by_nodename_buildname (self,nodename,buildname):
+        for q in self.qemu_instances:
+            if q.nodename==nodename and q.buildname==buildname:
+                return q
+        return None
+
+    matcher=re.compile("\s*(?P<pid>[0-9]+).*-cdrom\s+(?P<nodename>[^\s]+)\.iso")
+    def sense(self, reboot=False, soft=False):
+        if reboot:
+            if not soft:
+                self.reboot()
+            else:
+                self.run_ssh(box,['pkill','qemu'],"Killing qemu instances")
+            return
+        print 'q',
+        modules=self.backquote_ssh(['lsmod']).split('\n')
+        self._driver='*NO kqemu/kmv_intel MODULE LOADED*'
+        for module in modules:
+            if module.find('kqemu')==0:
+                self._driver='kqemu module loaded'
+            # kvm might be loaded without vkm_intel (we dont have AMD)
+            elif module.find('kvm_intel')==0:
+                self._driver='kvm_intel module loaded'
+        ########## find out running pids
+        pids=self.backquote_ssh(['pgrep','qemu'])
+        if not pids: return
+        command=['ps','-o','pid,command'] + [ pid for pid in pids.split("\n") if pid]
+        ps_lines = self.backquote_ssh (command).split("\n")
+        for line in ps_lines:
+            if not line.strip() or line.find('PID') >=0 : continue
+            m=QemuBox.matcher.match(line)
+            if m: self.add_node (m.group('nodename'),m.group('pid'))
+            else: header('command %r returned line that failed to match'%command)
+        ########## retrieve alive instances and map to build
+        live_builds=[]
+        command=['grep','.','*/*/qemu.pid','/dev/null']
+        pid_lines=self.backquote_ssh(command,trash_err=True).split('\n')
+        for pid_line in pid_lines:
+            if not pid_line.strip(): continue
+            # expect <build>/<nodename>/qemu.pid:<pid>pid
+            try:
+                (buildname,nodename,tail)=pid_line.split('/')
+                (_,pid)=tail.split(':')
+                q=self.qemu_instance_by_pid (pid)
+                if not q: continue
+                q.set_buildname(buildname)
+                live_builds.append(buildname)
+            except: print 'WARNING, could not parse pid line',pid_line
+        # retrieve timestamps
+        command=   ['grep','.']
+        command += ['%s/*/timestamp'%b for b in live_builds]
+        command += ['/dev/null']
+        ts_lines=self.backquote_ssh(command,trash_err=True).split('\n')
+        for ts_line in ts_lines:
+            if not ts_line.strip(): continue
+            # expect <build>/<nodename>/timestamp:<timestamp>
+            try:
+                (buildname,nodename,tail)=ts_line.split('/')
+                nodename=nodename.replace('qemu-','')
+                (_,timestamp)=tail.split(':')
+                timestamp=int(timestamp)
+                q=self.qemu_instance_by_nodename_buildname(nodename,buildname)
+                if not q: 
+                    print 'WARNING unattached qemu instance',ts_line,nodename,buildname
+                    continue
+                q.set_timestamp(timestamp)
+            except:  print 'WARNING, could not parse ts line',ts_line
+
+############################################################
+class Options: pass
+
+class Substrate:
+
+    def test (self): 
+        self.sense()
+
+    def __init__ (self):
+        self.options=Options()
+        self.options.dry_run=False
+        self.options.verbose=False
+        self.options.probe=True
+        self.options.soft=True
+        self.build_boxes = [ BuildBox(h) for h in self.build_boxes_spec() ]
+        self.plc_boxes = [ PlcBox (h,m) for (h,m) in self.plc_boxes_spec ()]
+        self.qemu_boxes = [ QemuBox (h,m) for (h,m) in self.qemu_boxes_spec ()]
+        self.all_boxes = self.build_boxes + self.plc_boxes + self.qemu_boxes
+        self._sensed=False
+
+        self.vplc_pool = Pool (self.vplc_ips(),"for vplcs")
+        self.vnode_pool = Pool (self.vnode_ips(),"for vnodes")
+
+        self.vnode_pool.list()
+
+
+#    def build_box_names (self):
+#        return [ h for h in self.build_boxes_spec() ]
+#    def plc_boxes (self):
+#        return [ h for (h,m) in self.plc_boxes_spec() ]
+#    def qemu_boxes (self):
+#        return [ h for (h,m) in self.qemu_boxes_spec() ]
+
+    def sense (self,force=False):
+        if self._sensed and not force: return
+        print 'Sensing local substrate...',
+        for b in self.all_boxes: b.sense()
+        print 'Done'
+        self._sensed=True
+
+    ########## 
+    def provision (self,plcs,options):
+        try:
+            self.sense()
+            self.list_all()
+            # attach each plc to a plc box and an IP address
+            plcs = [ self.provision_plc (plc,options) for plc in plcs ]
+            # attach each node/qemu to a qemu box with an IP address
+            plcs = [ self.provision_qemus (plc,options) for plc in plcs ]
+            # update the SFA spec accordingly
+            plcs = [ self.localize_sfa_rspec(plc,options) for plc in plcs ]
+            return plcs
+        except Exception, e:
+            print '* Could not provision this test on current substrate','--',e,'--','exiting'
+            traceback.print_exc()
+            sys.exit(1)
+
+    # find an available plc box (or make space)
+    # and a free IP address (using options if present)
+    def provision_plc (self, plc, options):
+        #### we need to find one plc box that still has a slot
+        plc_box=None
+        max_free=0
+        # use the box that has max free spots for load balancing
+        for pb in self.plc_boxes:
+            free=pb.free_spots()
+            if free>max_free:
+                plc_box=pb
+                max_free=free
+        # everything is already used
+        if not plc_box:
+            # find the oldest of all our instances
+            all_plc_instances=reduce(lambda x, y: x+y, 
+                                     [ pb.plc_instances for pb in self.plc_boxes ],
+                                     [])
+            all_plc_instances.sort(timestamp_sort)
+            plc_instance_to_kill=all_plc_instances[0]
+            plc_box=plc_instance_to_kill.plc_box
+            plc_instance_to_kill.kill()
+            print 'killed oldest = %s on %s'%(plc_instance_to_kill.line(),
+                                             plc_instance_to_kill.plc_box.hostname)
+
+        utils.header( 'plc %s -> box %s'%(plc['name'],plc_box.line()))
+        plc_box.add_fake(plc['name'])
+        #### OK we have a box to run in, let's find an IP address
+        # look in options
+        if options.ips_vplc:
+            vplc_hostname=options.ips_vplc.pop()
+        else:
+            self.vplc_pool.sense()
+            (vplc_hostname,unused)=self.vplc_pool.next_free()
+        vplc_ip = self.vplc_pool.get_ip(vplc_hostname)
+
+        #### compute a helpful vserver name
+        # remove domain in hostname
+        vplc_simple = vplc_hostname.split('.')[0]
+        vservername = "%s-%d-%s" % (options.buildname,plc['index'],vplc_simple)
+        plc_name = "%s_%s"%(plc['name'],vplc_simple)
+
+        #### apply in the plc_spec
+        # # informative
+        # label=options.personality.replace("linux","")
+        mapper = {'plc': [ ('*' , {'hostname':plc_box.hostname,
+                                   # 'name':'%s-'+label,
+                                   'name': plc_name,
+                                   'vservername':vservername,
+                                   'vserverip':vplc_ip,
+                                   'PLC_DB_HOST':vplc_hostname,
+                                   'PLC_API_HOST':vplc_hostname,
+                                   'PLC_BOOT_HOST':vplc_hostname,
+                                   'PLC_WWW_HOST':vplc_hostname,
+                                   'PLC_NET_DNS1' : self.network_settings() [ 'interface_fields:dns1' ],
+                                   'PLC_NET_DNS2' : self.network_settings() [ 'interface_fields:dns2' ],
+                                   } ) ]
+                  }
+
+        utils.header("Attaching %s on IP %s in vserver %s"%(plc['name'],vplc_hostname,vservername))
+        # mappers only work on a list of plcs
+        return TestMapper([plc],options).map(mapper)[0]
+
+    ##########
+    def provision_qemus (self, plc, options):
+        test_mapper = TestMapper ([plc], options)
+        nodenames = test_mapper.node_names()
+        maps=[]
+        for nodename in nodenames:
+            #### similarly we want to find a qemu box that can host us
+            qemu_box=None
+            max_free=0
+            # use the box that has max free spots for load balancing
+            for qb in self.qemu_boxes:
+                free=qb.free_spots()
+            if free>max_free:
+                qemu_box=qb
+                max_free=free
+            # everything is already used
+            if not qemu_box:
+                # find the oldest of all our instances
+                all_qemu_instances=reduce(lambda x, y: x+y, 
+                                         [ qb.qemu_instances for qb in self.qemu_boxes ],
+                                         [])
+                all_qemu_instances.sort(timestamp_sort)
+                qemu_instance_to_kill=all_qemu_instances[0]
+                qemu_box=qemu_instance_to_kill.qemu_box
+                qemu_instance_to_kill.kill()
+                print 'killed oldest = %s on %s'%(qemu_instance_to_kill.line(),
+                                                 qemu_instance_to_kill.qemu_box.hostname)
+
+            utils.header( 'node %s -> qemu box %s'%(nodename,qemu_box.line()))
+            qemu_box.add_fake(nodename)
+            #### OK we have a box to run in, let's find an IP address
+            # look in options
+            if options.ips_vnode:
+                qemu_hostname=options.ips_vnode.pop()
+                mac=self.vnode_pool.retrieve_userdata(qemu_hostname)
+                print 'case 1 hostname',qemu_hostname,'mac',mac
+            else:
+                self.vnode_pool.sense()
+                (qemu_hostname,mac)=self.vnode_pool.next_free()
+                print 'case 2 hostname',qemu_hostname,'mac',mac
+            ip=self.vnode_pool.get_ip (qemu_hostname)
+            utils.header("Attaching %s on IP %s MAC %s"%(plc['name'],qemu_hostname,mac))
+
+            if qemu_hostname.find('.')<0:
+                qemu_hostname += "."+self.domain()
+            nodemap={'host_box':qemu_box.hostname,
+                     'node_fields:hostname':qemu_hostname,
+                     'interface_fields:ip':ip, 
+                     'interface_fields:mac':mac,
+                     }
+            nodemap.update(self.network_settings())
+            maps.append ( (nodename, nodemap) )
+
+        return test_mapper.map({'node':maps})[0]
+
+    def localize_sfa_rspec (self,plc,options):
+       
+        plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST']
+        plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST']
+        plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST']
+        plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST']
+        plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' 
+        for site in plc['sites']:
+            for node in site['nodes']:
+                plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname']
+       return plc
+
+    #################### show results for interactive mode
+    def list_all (self):
+        self.sense()
+        for b in self.all_boxes: b.list()
+
+    def get_box (self,box):
+        for b in self.build_boxes + self.plc_boxes + self.qemu_boxes:
+            if b.simple_hostname()==box:
+                return b
+        print "Could not find box %s"%box
+        return None
+
+    def list_box(self,box):
+        b=self.get_box(box)
+        if not b: return
+        b.sense()
+        b.list()
+
+    # can be run as a utility to manage the local infrastructure
+    def main (self):
+        parser=OptionParser()
+        (options,args)=parser.parse_args()
+        if not args:
+            self.list_all()
+        else:
+            for box in args:
+                self.list_box(box)
index 2645fe1..6b18b26 100755 (executable)
@@ -14,9 +14,9 @@ from TestPlc import TestPlc
 from TestSite import TestSite
 from TestNode import TestNode
 
-# add $HOME in PYTHONPATH so we can import LocalTestResources.py
+# add $HOME in PYTHONPATH so we can import LocalSubstrate.py
 sys.path.append(os.environ['HOME'])
-import LocalTestResources
+import LocalSubstrate
 
 class TestMain:
 
@@ -75,7 +75,7 @@ arch-rpms-url defaults to the last value used, as stored in arg-arch-rpms-url,
    no default
 config defaults to the last value used, as stored in arg-config,
    or %r
-ips_node, ips_plc and ips_qemu defaults to the last value used, as stored in arg-ips-{node,plc,qemu},
+ips_vnode, ips_vplc and ips_qemu defaults to the last value used, as stored in arg-ips-{node,vplc,qemu},
    default is to use IP scanning
 steps refer to a method in TestPlc or to a step_* module
 ===
@@ -100,9 +100,9 @@ steps refer to a method in TestPlc or to a step_* module
                           help="Run all default steps")
         parser.add_option("-l","--list",action="store_true",dest="list_steps", default=False,
                           help="List known steps")
-        parser.add_option("-N","--nodes",action="append", dest="ips_node", default=[],
+        parser.add_option("-N","--nodes",action="append", dest="ips_vnode", default=[],
                           help="Specify the set of hostname/IP's to use for nodes")
-        parser.add_option("-P","--plcs",action="append", dest="ips_plc", default=[],
+        parser.add_option("-P","--plcs",action="append", dest="ips_vplc", default=[],
                           help="Specify the set of hostname/IP's to use for plcs")
         parser.add_option("-Q","--qemus",action="append", dest="ips_qemu", default=[],
                           help="Specify the set of hostname/IP's to use for qemu boxes")
@@ -137,20 +137,20 @@ steps refer to a method in TestPlc or to a step_* module
                     result.append(el)
             return result
         # flatten relevant options
-        for optname in ['config','exclude','ips_node','ips_plc','ips_qemu']:
+        for optname in ['config','exclude','ips_vnode','ips_vplc','ips_qemu']:
             setattr(self.options,optname, flatten ( [ arg.split() for arg in getattr(self.options,optname) ] ))
 
         # handle defaults and option persistence
         for (recname,filename,default,need_reverse) in (
             ('build_url','arg-build-url',TestMain.default_build_url,None) ,
-            ('ips_node','arg-ips-node',[],True) , 
-            ('ips_plc','arg-ips-plc',[],True) , 
+            ('ips_vnode','arg-ips-vnode',[],True) , 
+            ('ips_vplc','arg-ips-vplc',[],True) , 
             ('ips_qemu','arg-ips-qemu',[],True) , 
             ('config','arg-config',TestMain.default_config,False) , 
             ('arch_rpms_url','arg-arch-rpms-url',"",None) , 
-            ('personality','arg-personality',"linux32",None),
-            ('pldistro','arg-pldistro',"planetlab",None),
-            ('fcdistro','arg-fcdistro','centos5',None),
+            ('personality','arg-personality',"linux64",None),
+            ('pldistro','arg-pldistro',"onelab",None),
+            ('fcdistro','arg-fcdistro','f14',None),
             ) :
 #            print 'handling',recname
             path=filename
@@ -245,20 +245,20 @@ steps refer to a method in TestPlc or to a step_* module
                 raise
 
         # run localize as defined by local_resources
-        all_plc_specs = LocalTestResources.local_resources.localize(all_plc_specs,self.options)
+        all_plc_specs = LocalSubstrate.local_substrate.provision(all_plc_specs,self.options)
 
         # remember plc IP address(es) if not specified
-        ips_plc_file=open('arg-ips-plc','w')
+        ips_vplc_file=open('arg-ips-vplc','w')
         for plc_spec in all_plc_specs:
-            ips_plc_file.write("%s\n"%plc_spec['PLC_API_HOST'])
-        ips_plc_file.close()
+            ips_vplc_file.write("%s\n"%plc_spec['PLC_API_HOST'])
+        ips_vplc_file.close()
         # ditto for nodes
-        ips_node_file=open('arg-ips-node','w')
+        ips_vnode_file=open('arg-ips-vnode','w')
         for plc_spec in all_plc_specs:
             for site_spec in plc_spec['sites']:
                 for node_spec in site_spec['nodes']:
-                    ips_node_file.write("%s\n"%node_spec['node_fields']['hostname'])
-        ips_node_file.close()
+                    ips_vnode_file.write("%s\n"%node_spec['node_fields']['hostname'])
+        ips_vnode_file.close()
         # ditto for qemu boxes
         ips_qemu_file=open('arg-ips-qemu','w')
         for plc_spec in all_plc_specs:
index c0358a5..7d0547f 100644 (file)
@@ -6,8 +6,7 @@
 # mapper class
 # 
 # this works on a spec as defined in a config file
-# and allows to remap various fields, typically to another testbox 
-# see an example in config_onelab_testbox32.py
+# and allows to remap various fields on the local substrate
 # 
 
 import utils
@@ -58,6 +57,9 @@ class TestMapper:
                             utils.header ("WARNING : inserting key %s for path %s on %s %s"%(
                                     step,path,type,name))
                     # apply formatting if '%s' found in the value
+                    if v is None:
+                        if self.options.verbose: print "TestMapper WARNING - None value - ignored, key=",k
+                        continue
                     if v.find('%s')>=0:
                         v=v%obj[k]
                     if self.options.verbose:
index 4b499e7..11e9391 100644 (file)
@@ -198,8 +198,13 @@ class TestNode:
             utils.header("TestNode.qemu_start : %s model %s taken as real node"%(self.name(),model))
         return True
 
+    def timestamp_qemu (self):
+        test_box = self.test_box()
+        test_box.run_in_buildname("mkdir -p %s"%self.nodedir())
+        now=int(time.time())
+        return test_box.run_in_buildname("echo %d > %s/timestamp"%(now,self.nodedir()))==0
+
     def start_qemu (self):
-        options = self.test_plc.options
         test_box = self.test_box()
         utils.header("Starting qemu node %s on %s"%(self.name(),test_box.hostname()))
 
index 1ccdc80..fbd3295 100644 (file)
@@ -86,7 +86,7 @@ SEPSFA='<sep_sfa>'
 class TestPlc:
 
     default_steps = [
-        'show', 'local_pre', SEP,
+        'show', 'timestamp_plc', 'timestamp_qemu', SEP,
         'vs_delete','vs_create','plc_install', 'plc_configure', 'plc_start', SEP,
         'keys_fetch', 'keys_store', 'keys_clear_known_hosts', SEP,
         'initscripts', 'sites', 'nodes', 'slices', 'nodegroups', 'leases', SEP,
@@ -441,6 +441,12 @@ class TestPlc:
         print '+\tqemu box %s'%node_spec['host_box']
         print '+\thostname=%s'%node_spec['node_fields']['hostname']
 
+    # write a timestamp in /vservers/<>/
+    def timestamp_plc (self):
+        now=int(time.time())
+        utils.system(self.test_ssh.actual_command("mkdir -p /vservers/%s"%self.vservername))
+        return utils.system(self.test_ssh.actual_command("echo %d > /vservers/%s/timestamp"%(now,self.vservername)))==0
+        
     def local_pre (self):
         "run site-dependant pre-test script as defined in LocalTestResources"
         from LocalTestResources import local_resources
@@ -1056,6 +1062,11 @@ class TestPlc:
         "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
         pass
 
+    @node_mapper
+    def timestamp_qemu (self) : 
+        "all nodes: start the qemu instance (also runs qemu-bridge-init start)"
+        pass
+
     def check_tcp (self):
         "check TCP connectivity between 2 slices (or in loopback if only one is defined)"
         specs = self.plc_spec['tcp_test']
diff --git a/system/TestPool.py b/system/TestPool.py
deleted file mode 100644 (file)
index f1ffa6d..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-#
-# Thierry Parmentelat <thierry.parmentelat@inria.fr>
-# Copyright (C) 2010 INRIA 
-#
-#
-# pool class
-# 
-# allows to pick an available IP among a pool
-#
-# input is expressed as a list of tuples (hostname,ip,user_data)
-# that can be searched iteratively for a free slot
-# TestPoolIP : look for a free IP address
-# TestPoolQemu : look for a test_box with no qemu running
-# e.g.
-# pool = [ (hostname1,ip1,user_data1),  
-#          (hostname2,ip2,user_data2),  
-#          (hostname3,ip3,user_data2),  
-#          (hostname4,ip4,user_data4) ]
-# assuming that ip1 and ip3 are taken (pingable), then we'd get
-# pool=TestPoolIP(pool)
-# pool.next_free() -> entry2
-# pool.next_free() -> entry4
-# pool.next_free() -> None
-# that is, even if ip2 is not busy/pingable when the second next_free() is issued
-
-import commands
-import utils
-
-class TestPool:
-
-    def __init__ (self, pool, options,message):
-        self.pool=pool
-        self.options=options
-        self.busy=[]
-        self.message=message
-
-    # let's be flexible
-    def match (self,triple,hostname_or_ip):
-        (h,i,u)=triple
-        return h.find(hostname_or_ip)>=0  or (i and i.find(hostname_or_ip)>=0) or hostname_or_ip.find(h)==0
-
-    def locate_entry (self, hostname_or_ip):
-        for (h,i,u) in self.pool:
-            if self.match ( (h,i,u,), hostname_or_ip):
-                self.busy.append(h)
-                return (h,i,u)
-        utils.header('TestPool.locate_entry: Could not locate entry for %r in pool:'%hostname_or_ip)
-        return None
-
-    # the hostnames provided (from a tracker) are considered last
-    def next_free (self, tracker_hostnames):
-        utils.header('TestPool is looking for a %s'%self.message)
-        # create 2 lists of (h,i,u) entries, the ones not in the tracker, and the ones in the tracker
-        in_track_pool=[]
-        out_track_pool=[]
-        for (h,i,u) in self.pool:
-            in_tracker=False
-            for hostname in tracker_hostnames:
-                if self.match ( (h,i,u,) , hostname) : in_tracker = True
-            if in_tracker: in_track_pool.append  ( (h,i,u,) )
-            else:          out_track_pool.append ( (h,i,u,) )
-        # consider outsiders first
-        for (hostname,ip,user_data) in out_track_pool + in_track_pool:
-            utils.header ('* candidate %s' % hostname)
-        for (hostname,ip,user_data) in out_track_pool + in_track_pool:
-            if hostname in self.busy:
-                continue
-            utils.header('TestPool : checking %s'%hostname)
-            if self.free_hostname(hostname):
-                utils.header('%s is available'%hostname)
-                self.busy.append(hostname)
-                return (hostname,ip,user_data)
-            else:
-                self.busy.append(hostname)
-        raise Exception, "No space left in pool (%s)"%self.message
-
-class TestPoolIP (TestPool):
-
-    def __init__ (self,pool,options):
-        TestPool.__init__(self,pool,options,"free IP address")
-
-    def free_hostname (self, hostname):
-        return not self.check_ping(hostname)
-
-# OS-dependent ping option (support for macos, for convenience)
-    ping_timeout_option = None
-# checks whether a given hostname/ip responds to ping
-    def check_ping (self,hostname):
-        if not TestPoolIP.ping_timeout_option:
-            (status,osname) = commands.getstatusoutput("uname -s")
-            if status != 0:
-                raise Exception, "TestPool: Cannot figure your OS name"
-            if osname == "Linux":
-                TestPoolIP.ping_timeout_option="-w"
-            elif osname == "Darwin":
-                TestPoolIP.ping_timeout_option="-t"
-
-        if self.options.verbose:
-            utils.header ("TestPoolIP: pinging %s"%hostname)
-        command="ping -c 1 %s 1 %s"%(TestPoolIP.ping_timeout_option,hostname)
-        (status,output) = commands.getstatusoutput(command)
-        return status == 0
-
-class TestPoolQemu (TestPool):
-    
-    def __init__ (self,pool,options):
-        TestPool.__init__(self,pool,options,"free qemu box")
-
-    def free_hostname (self, hostname):
-        return not self.busy_qemu(hostname)
-
-    # is there a qemu runing on that box already ?
-    def busy_qemu (self, hostname):
-        if self.options.verbose:
-            utils.header("TestPoolQemu: checking for running qemu instances on %s"%hostname)
-        command="ssh -o ConnectTimeout=5 root@%s ps -e -o cmd"%hostname
-        (status,output) = commands.getstatusoutput(command)
-        # if we fail to run that, let's assume we don't have ssh access, so
-        # we pretend the box is busy
-        if status!=0:
-            return True
-        elif output.find("qemu") >=0 :
-            return True
-        else:
-            return False
diff --git a/system/TestResources.py b/system/TestResources.py
deleted file mode 100644 (file)
index c75bebb..0000000
+++ /dev/null
@@ -1,242 +0,0 @@
-# Thierry Parmentelat <thierry.parmentelat@inria.fr>
-# Copyright (C) 2010 INRIA 
-#
-import sys
-import traceback
-
-import utils
-from TestMapper import TestMapper
-from TestPool import TestPoolQemu, TestPoolIP
-from Trackers import TrackerPlc, TrackerQemu
-
-class TestResources:
-
-    # need more specialization, see an example in LocalTestResources.sample.inria
-
-    ########## 
-    def localize (self,plcs,options):
-        try:
-            plcs = self.localize_qemus(plcs,options)
-        except Exception, e:
-            print '* Could not localize qemus','--',e,'--','exiting'
-            traceback.print_exc()
-            sys.exit(1)
-        try:
-            plcs = self.localize_nodes(plcs,options)
-        except Exception,e:
-            print '* Could not localize nodes','--',e,'--','exiting'
-            sys.exit(1)
-        try:
-            plcs = self.localize_plcs(plcs,options)
-        except Exception,e:
-            print '* Could not localize plcs','--',e,'--','exiting'
-            sys.exit(1)
-        try:
-            plcs = self.localize_rspec(plcs,options)
-        except Exception,e:
-            print '* Could not localize RSpec','--',e,'--','exiting'
-            sys.exit(1)
-        return plcs
-
-    def localize_qemus (self,plcs,options):
-
-        # all plcs on the same vserver box
-        plc_box = self.plc_boxes()[0]
-
-        # informative
-        label=options.personality.replace("linux","")
-
-        node_map = []
-        qemu_pool = TestPoolQemu (self.qemus_ip_pool(), options)
-
-        for index in range(1,options.size+1):
-            if options.ips_qemu:
-                ip_or_hostname=options.ips_qemu.pop()
-                (hostname,ip,unused)=qemu_pool.locate_entry(ip_or_hostname)
-            else:
-                tracker=TrackerQemu(options,instances=self.max_qemus()-1)
-                (hostname,ip,unused) = qemu_pool.next_free(tracker.hostnames())
-
-            node_map += [ ('node%d'%index, {'host_box':hostname},) ]
-
-        mapper = {'plc': [ ('*' , {'hostname':plc_box,
-                                   'PLC_DB_HOST':plc_box,
-                                   'PLC_API_HOST':plc_box,
-                                   'PLC_BOOT_HOST':plc_box,
-                                   'PLC_WWW_HOST':plc_box,
-                                   'name':'%s-'+label } ) 
-                           ],
-                  'node': node_map,
-                  }
-    
-        return TestMapper(plcs,options).map(mapper)
-        
-
-    def localize_nodes (self, plcs, options):
-       
-        ip_pool = TestPoolIP (self.nodes_ip_pool(),options)
-        network_dict = self.network_dict()
-
-        test_mapper = TestMapper (plcs, options)
-    
-        all_nodenames = test_mapper.node_names()
-        maps = []
-        for nodename in all_nodenames:
-            if options.ips_node:
-                ip_or_hostname=options.ips_node.pop()
-                (hostname,ip,mac)=ip_pool.locate_entry(ip_or_hostname)
-            else:
-                tracker=TrackerQemu(options,instances=self.max_qemus()-1)
-                (hostname,ip,mac) = ip_pool.next_free(tracker.nodenames())
-            # myplc needs a fqdn or whines otherwise
-            if hostname.find('.')<0: hostname += "." + self.domain()
-            utils.header('Attaching node %s to %s (%s)'%(nodename,hostname,ip))
-            node_dict= {'node_fields:hostname':hostname,
-                        'interface_fields:ip':ip, 
-                        'interface_fields:mac':mac,
-                        }
-        
-            node_dict.update(network_dict)
-            maps.append ( ( nodename, node_dict) )
-    
-        plc_map = [ ( '*' , { 'PLC_NET_DNS1' : network_dict [ 'interface_fields:dns1' ],
-                              'PLC_NET_DNS2' : network_dict [ 'interface_fields:dns2' ], } ) ]
-    
-        return test_mapper.map ({'node': maps, 'plc' : plc_map } )
-        
-
-    def localize_plcs (self,plcs,options):
-        
-        ip_pool = TestPoolIP (self.plcs_ip_pool(),options)
-    
-        plc_counter=0
-        for plc in plcs:
-            if options.ips_plc :
-                ip_or_hostname=options.ips_plc.pop()
-                (hostname,ip,mac)=ip_pool.locate_entry(ip_or_hostname)
-                if options.verbose:
-                    utils.header("Using user-provided %s %s for plc %s"%(
-                            hostname,ip_or_hostname,plc['name']))
-            else:
-                tracker = TrackerPlc(options,instances=self.max_plcs())
-                (hostname,ip,mac)=ip_pool.next_free(tracker.plcnames())
-                if options.verbose:
-                    utils.header("Using auto-allocated %s %s for plc %s"%(
-                            hostname,ip,plc['name']))
-    
-            ### rewrite fields in plc
-            # compute a helpful vserver name - remove domain in hostname
-            simplehostname = hostname.split('.')[0]
-            preferred_hostname = self.preferred_hostname()
-            vservername = options.buildname
-            if len(plcs) == 1 :
-                vservername = "%s-%s" % (vservername,simplehostname)
-                #ugly hack for "vuname: vc_set_vhi_name(): Arg list too long" errors
-                if len(vservername) > 38 and preferred_hostname is not None:
-                    vservername = "%s-%s" % (options.buildname,preferred_hostname)
-            else:
-                plc_counter += 1
-                vservername = "%s-%d-%s" % (vservername,plc_counter,simplehostname)
-                #ugly hack for "vuname: vc_set_vhi_name(): Arg list too long" errors
-                if len(vservername) > 38 and preferred_hostname is not None:
-                    vservername = "%s-%d-%s" % (options.buildname,plc_counter,preferred_hostname)
-
-            # apply
-            plc['vservername']=vservername
-            plc['vserverip']=ip
-            plc['name'] = "%s_%s"%(plc['name'],simplehostname)
-            utils.header("Attaching plc %s to vserver %s (%s)"%(
-                    plc['name'],plc['vservername'],plc['vserverip']))
-            for key in [ 'PLC_DB_HOST', 'PLC_API_HOST', 'PLC_WWW_HOST', 'PLC_BOOT_HOST',]:
-                plc[key] = hostname
-    
-        return plcs
-
-    # as a plc step this should return a boolean
-    def step_pre (self,plc):
-        return self.trqemu_record (plc) and self.trqemu_make_space(plc) \
-           and self.trplc_record (plc) and self.trplc_make_space(plc)
-
-    def step_post (self,plc):
-        return True
-
-    def step_release (self,plc):
-        return self.trqemu_release(plc) and self.trplc_release(plc)
-
-    def step_release_plc (self,plc):
-        return self.trplc_release(plc) 
-
-    def step_release_qemu (self,plc):
-        return self.trqemu_release(plc) 
-
-    def step_list (self,plc):
-        return self.trqemu_list(plc) and self.trplc_list(plc)
-
-    ####################
-    def trplc_record (self,plc):
-        tracker = TrackerPlc(plc.options,instances=self.max_plcs())
-        tracker.record(plc.test_ssh.hostname,plc.vservername)
-        tracker.store()
-        return True
-
-    def trplc_release (self,plc):
-        tracker = TrackerPlc(plc.options,instances=self.max_plcs())
-        tracker.release(plc.test_ssh.hostname,plc.vservername)
-        tracker.store()
-        return True
-
-    def trplc_make_space (self,plc):
-        tracker = TrackerPlc(plc.options,instances=self.max_plcs())
-        tracker.make_space()
-        tracker.store()
-        return True
-
-    def trplc_list (self,plc):
-        TrackerPlc(plc.options,instances=self.max_plcs()).list()
-        return True
-
-    ###
-    def trqemu_record (self,plc):
-        tracker=TrackerQemu(plc.options,instances=self.max_qemus()-1)
-        for site_spec in plc.plc_spec['sites']:
-            for node_spec in site_spec['nodes']:
-                tracker.record(node_spec['host_box'],plc.options.buildname,node_spec['node_fields']['hostname'])
-        tracker.store()
-        return True
-
-    def trqemu_release (self,plc):
-        tracker=TrackerQemu(plc.options,instances=self.max_qemus()-1)
-        for site_spec in plc.plc_spec['sites']:
-            for node_spec in site_spec['nodes']:
-                tracker.release(node_spec['host_box'],plc.options.buildname,node_spec['node_fields']['hostname'])
-        tracker.store()
-        return True
-
-    def trqemu_make_space (self,plc):
-        tracker=TrackerQemu(plc.options,instances=self.max_qemus()-1)
-        for site_spec in plc.plc_spec['sites']:
-            for node_spec in site_spec['nodes']:
-                tracker.make_space()
-        tracker.store()
-        return True
-
-    def trqemu_list (self,plc):
-        TrackerQemu(plc.options,instances=self.max_qemus()-1).list()
-        return True
-
-    ###
-    def localize_rspec (self,plcs,options):
-       
-       utils.header ("Localize SFA Slice RSpec")
-
-       for plc in plcs:
-           for site in plc['sites']:
-               for node in site['nodes']:
-                   plc['sfa']['sfa_slice_rspec']['part4'] = node['node_fields']['hostname']
-            plc['sfa']['SFA_REGISTRY_HOST'] = plc['PLC_DB_HOST']
-            plc['sfa']['SFA_AGGREGATE_HOST'] = plc['PLC_DB_HOST']
-            plc['sfa']['SFA_SM_HOST'] = plc['PLC_DB_HOST']
-            plc['sfa']['SFA_PLC_DB_HOST'] = plc['PLC_DB_HOST']
-           plc['sfa']['SFA_PLC_URL'] = 'https://' + plc['PLC_API_HOST'] + ':443/PLCAPI/' 
-       
-       return plcs
index 1028a18..cae5fa4 100644 (file)
@@ -47,16 +47,18 @@ class TestSsh:
             utils.header("WARNING : something wrong in is_local_hostname with hostname=%s"%hostname)
             return False
 
-    def __init__(self,hostname,buildname=None,key=None, username=None):
+    def __init__(self,hostname,buildname=None,key=None, username=None,unknown_host=True):
         self.hostname=hostname
         self.buildname=buildname
         self.key=key
         self.username=username
+        self.unknown_host=unknown_host
 
     def is_local(self):
         return TestSsh.is_local_hostname(self.hostname)
      
-    std_options="-o BatchMode=yes -o StrictHostKeyChecking=no -o CheckHostIP=no -o ConnectTimeout=5 -o UserKnownHostsFile=/dev/null "
+    std_options="-o BatchMode=yes -o StrictHostKeyChecking=no -o CheckHostIP=no -o ConnectTimeout=5 "
+    unknown_option="-o UserKnownHostsFile=/dev/null "
     
     def key_part (self):
         if not self.key:
@@ -77,12 +79,33 @@ class TestSsh:
         if not keep_stdin:
             ssh_command += "-n "
         ssh_command += TestSsh.std_options
+        if self.unknown_host: ssh_command += TestSsh.unknown_option
         ssh_command += self.key_part()
         ssh_command += "%s %s" %(self.hostname_part(),TestSsh.backslash_shell_specials(command))
         return ssh_command
 
-    def run(self, command,background=False):
+    # same in argv form
+    def actual_argv (self, argv,keep_stdin=False):
+        if self.is_local():
+            return argv
+        ssh_argv=[]
+        ssh_argv.append('ssh')
+        if not keep_stdin: ssh_argv.append('-n')
+        ssh_argv += TestSsh.std_options.split()
+        if self.unknown_host: ssh_argv += TestSsh.unknown_option.split()
+        ssh_argv += self.key_part().split()
+        ssh_argv.append(self.hostname_part())
+        ssh_argv += argv
+        return ssh_argv
+
+    def header (self,message):
+        if not message: return
+        print "===============",message
+        sys.stdout.flush()
+
+    def run(self, command,message=None,background=False):
         local_command = self.actual_command(command)
+        self.header(message)
         return utils.system(local_command,background)
 
     def clean_dir (self,dirname):
diff --git a/system/Trackers.py b/system/Trackers.py
deleted file mode 100644 (file)
index de26b31..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/python
-
-# Thierry Parmentelat <thierry.parmentelat@inria.fr>
-# Copyright (C) 2010 INRIA 
-#
-import os
-
-import utils
-from TestSsh import TestSsh
-
-# 2 types of trackers
-# (*) plc trackers remembers the running myplcs
-# (*) qemu trackers keeps track of the running qemu nodes
-#
-# trackers allow us to let the test run after the build has finished, 
-# and to kill/stop the oldest instances later when we need space
-# 
-
-#################### Tracker
-class Tracker:
-    
-    def __init__ (self, options,filename, instances):
-        self.options=options
-        self.filename=filename
-        self.instances=instances
-        try:
-            tracks=file(self.filename).readlines()
-            tracks = [ track.strip() for track in tracks ]
-        except:
-            tracks=[]
-        self.tracks = [track for track in tracks if track]
-
-    def list (self):
-        try:
-            contents=file(self.filename).read()
-            print "==>",self.filename,"<=="
-            print contents
-        except:
-            print "xxxxxxxxxxxx",self.filename,"not found"
-
-    def store (self):
-        out = file(self.filename,'w')
-        for track in self.tracks:
-            out.write('%s\n'%(track))
-        out.close()
-
-    def record (self,track):
-        for already in self.tracks:
-            if already==track:
-                print '%s is already included in %s'%(already,self.filename)
-                return
-        if self.options.dry_run:
-            print 'dry_run: Tracker.record - skipping %s'%(track)
-            return
-        self.tracks.append( track )
-        print "Recorded %s in tracker %s"%(track,self.filename)
-
-    # this stops the instances currently attached with this test session and release tracker
-    def release (self,track):
-        for already in self.tracks:
-            if already==track:
-                if self.options.dry_run:
-                    print 'dry_run: Tracker.release - skipping %s'%(track)
-                    return
-                self.tracks.remove(track)
-                print "Releasing %s in tracker %s"%(track,self.filename)
-                command = self.stop_command (track)
-                utils.header("Trackers.make_space track : %s"%command)
-                utils.system(command)
-        print '%s was not found in %s'%(track,self.filename)
-        return
-
-    # this actually stops the old instances, so that the total fits in the number of instances 
-    def make_space (self):
-        # number of instances to stop
-        how_many=len(self.tracks)-self.instances
-        # nothing todo until we have more than keep_vservers in the tracker
-        if how_many <= 0:
-            print 'Tracker.make_space : limit %d not reached'%self.instances
-            return
-        to_stop = self.tracks[:how_many]
-        for track in to_stop:
-            command = self.stop_command (track)
-            utils.header("Trackers.make_space track : %s"%command)
-            utils.system(command)
-        if not self.options.dry_run:
-            self.tracks = self.tracks[how_many:]
-
-    # this stops ALL known instances
-    def cleanup (self):
-        for track in self.tracks:
-            command=self.stop_command(track)
-            utils.header("Trackers.cleanup track : %s"%command)
-            utils.system(command)
-        if not self.options.dry_run:
-            self.tracks=[]
-
-class TrackerPlc (Tracker):
-    
-    DEFAULT_FILENAME=os.environ['HOME']+"/tracker-plcs"
-    # how many concurrent plcs are we keeping alive - adjust with the IP pool size
-    DEFAULT_MAX_INSTANCES = 12
-
-    def __init__ (self,options,filename=None,instances=0):
-        if not filename: filename=TrackerPlc.DEFAULT_FILENAME
-        if not instances: instances=TrackerPlc.DEFAULT_MAX_INSTANCES
-        Tracker.__init__(self,options,filename,instances)
-
-    def record (self, hostname, vservername):
-        Tracker.record (self,"%s@%s"%(hostname,vservername))
-
-    def release (self, hostname, vservername):
-        Tracker.release (self,"%s@%s"%(hostname,vservername))
-
-    def stop_command (self, track):
-        (hostname,vservername) = track.split('@')
-        return TestSsh(hostname).actual_command("vserver --silent %s stop"%vservername)
-        
-    def plcnames (self):
-        return [ self.plcname(track) for track in self.tracks ]
-
-    def plcname (self, track):
-        (hostname,vservername) = track.split('@')
-        return vservername.rsplit('-',1)[1]
-
-class TrackerQemu (Tracker):
-
-    DEFAULT_FILENAME=os.environ['HOME']+"/tracker-qemus"
-    # how many concurrent plcs are we keeping alive - adjust with the IP pool size
-    DEFAULT_MAX_INSTANCES = 3
-
-    def __init__ (self,options,filename=None,instances=0):
-        if not filename: filename=TrackerQemu.DEFAULT_FILENAME
-        if not instances: instances=TrackerQemu.DEFAULT_MAX_INSTANCES
-        Tracker.__init__(self,options,filename,instances)
-
-    def record (self, hostname, buildname, nodename):
-        Tracker.record (self,"%s@%s@%s"%(hostname,buildname,nodename))
-
-    def release (self, hostname, buildname, nodename):
-        Tracker.release (self,"%s@%s@%s"%(hostname,buildname,nodename))
-
-    def stop_command (self, track):
-        (hostname,buildname,nodename) = track.split('@')
-        return TestSsh(hostname).actual_command("%s/qemu-%s/qemu-kill-node this"%(buildname,nodename))
-
-    def hostnames (self):
-        return [ self.hostname(track) for track in self.tracks ]
-
-    def hostname (self, track):
-        (hostname,buildname,nodename) = track.split('@')
-        return hostname
-
-    def nodenames (self):
-        return [ self.nodename(track) for track in self.tracks ]
-        
-    def nodename (self, track):
-        (hostname,buildname,nodename) = track.split('@')
-        return nodename
-
index df41bab..16a4e90 100644 (file)
@@ -260,6 +260,7 @@ def leases (options, index):
 
 def plc (options,index) :
     return { 
+        'index' : index,
         'name' : 'onetest%d'%index,
         # as of yet, not sure we can handle foreign hosts, but this is required though
         'hostname' : 'deferred-myplc-hostbox-%d'%index,