import repository from arizona
[raven.git] / lib / ravenlib / files / tpparse.py
1 import base64
2 import hashlib
3 import os
4 import time
5 import xml.parsers.expat
6
7 def make_short_hash(x):
8     x = x.replace("_","/")
9     try:
10         d = base64.decodestring(x)
11     except base64.binascii.Error:
12         try:
13              d = base64.decodestring(x+"=")
14         except base64.binascii.Error:
15              try:
16                  d = base64.decodestring(x+"==")
17              except binascii.Error:
18                  d = base64.decodestring(x+"===")
19     return hashlib.sha1(d).hexdigest()
20
21 def get_tpfile_filenames(username, publickey_string):
22     filenames =[]
23     longkey = publickey_string.rstrip("=").replace("/","_")
24     filenames.append(username + "." + make_short_hash(longkey) + ".tpfile")
25     filenames.append(username + "." + longkey + ".tpfile")
26
27     return filenames
28
29 class xmlparse:
30     def __init__(self, fn=None, contents=None, dir=None):
31         self.filename = None
32         self.dirname = dir
33
34         if fn:
35             self.setFileName(fn)
36             self.parseFile()
37         elif contents:
38             self.parseString(contents)
39
40     def setFileName(self,fn):
41         self.filename = fn
42         self.dirname = os.path.dirname(os.path.abspath(self.filename))
43
44     def parseFile(self):
45         print "parse:", os.path.basename(self.filename)
46
47         self.initParser()
48         f = open(self.filename)
49         self.parser.ParseFile(f)
50         f.close()
51
52     def parseString(self, contents):
53         self.initParser()
54         self.parser.Parse(contents)
55
56     def initParser(self):
57        self.parser = xml.parsers.expat.ParserCreate()
58        self.parser.StartElementHandler = self.handle_start_tag
59        self.parser.EndElementHandler = self.handle_end_tag
60        self.parser.CharacterDataHandler = self.handle_data
61
62     def handle_data(self, data):
63         pass
64
65     def handle_start_tag(self, tag, attrs):
66         pass
67
68     def handle_end_tag(self, tag):
69         pass
70
71 class tpparse(xmlparse):
72     def __init__(self, fn=None, contents=None, dir=None):
73         self.entries = []
74         self.kind = "UNKNOWN"
75         self.in_signed_file = False
76         self.data_list = []
77         xmlparse.__init__(self, fn, contents, dir)
78
79     def handle_start_tag(self, tag, attrs):
80         self.data_list = []
81         if (self.kind == "UNKNOWN"):
82             if (tag == "SIGNED_FILE"):
83                self.kind = "SIGNED_FILE"
84             elif (tag == "TRUSTEDPACKAGES"):
85                self.kind = "TRUSTEDPACKAGES"
86
87         if (self.kind == "SIGNED_FILE"):
88             if (tag == "FILE"):
89                 self.in_signed_file=True
90
91         if (self.kind == "TRUSTEDPACKAGES"):
92             if (tag == "TRUSTEDPACKAGES"):
93                 pass
94             elif (tag == "PACKAGE") or (tag == "FILE"):
95                 self.handle_package(attrs)
96             elif (tag == "USER"):
97                 self.handle_user(attrs)
98             else:
99                 print "unknown tag kind:", tag
100
101     def handle_end_tag(self, tag):
102         if (self.in_signed_file) and (tag =="FILE"):
103             # joining a list of strings is much faster than repeated concatenations
104             data = "".join(self.data_list)
105
106             subparser = tpparse(dir=self.dirname, contents=data)
107             self.entries = subparser.entries
108             self.in_signed_file=False
109
110     def handle_data(self, data):
111         self.data_list.append(data)
112
113     def handle_package(self, attrs):
114         # Add the file action to the list...
115         tpentry = {}
116         tpentry['kind'] = "PACKAGE"
117         tpentry['pattern'] = attrs.get("PATTERN")
118         tpentry['hash'] = attrs.get("HASH")
119         tpentry['action'] = attrs.get("ACTION", "unspecified").lower().strip()
120         tpentry['timestamp'] = attrs.get("TIMESTAMP")
121         tpentry['provides'] = attrs.get("PROVIDES", None)
122
123         (tags, mantags) = self.parse_tags(attrs.get("TAGS",""))
124         tpentry['tags'] = tags
125         tpentry['mantags'] = mantags
126
127         self.entries.append(tpentry)
128
129     def handle_user(self, attrs):
130         username = str(attrs.get("USERNAME"))
131         publickey_string = str(attrs.get("PUBLICKEY"))
132
133         filenames = get_tpfile_filenames(username, publickey_string)
134
135         # XXX - we really ought to sort these by timestamp
136
137         filename = None
138         for candidate in filenames:
139             pathname = os.path.join(self.dirname, candidate)
140             if os.path.exists(pathname):
141                 filename = candidate
142                 break
143
144         if not filename:
145             print "tpparse: failed to find any of:", filenames, "in:", self.dirname
146             return
147
148         child_parser = tpparse(pathname)
149         child_entries = child_parser.entries
150
151         tpentry = {}
152         tpentry['username'] = username
153         tpentry['kind'] = "USER"
154         tpentry['pattern'] = attrs.get("PATTERN")
155         tpentry['tpfilename'] = filename
156         tpentry['action'] = attrs.get("ACTION", "unspecified").lower().strip()
157         tpentry['order-by'] = attrs.get("ORDER-BY", "default").lower().strip()
158
159         requiretags = attrs.get("REQUIRETAGS", "").lower().strip().split(",")
160         requiretags = [tag for tag in requiretags if tag != '']
161         tpentry['requiretags'] = requiretags
162
163         tpentry['provides'] = attrs.get("PROVIDES", None)
164
165         tpentry['children'] = child_entries
166
167         self.entries.append(tpentry)
168
169     def parse_tags(self, s):
170         tags = []
171         mantags = []
172         # the tags are a comma-seperated list.
173         tags_str = s.lower().strip().split(",")
174         for tag in tags_str:
175             # if a tag starts with a "+", then it's a mandatory tag
176             if tag[:1] == '+':
177                 mantags.append(tag[1:])
178                 tags.append(tag[1:])
179             else:
180                 tags.append(tag)
181         return (tags, mantags)
182
183     def dump(self, indent, entries=None):
184         if entries==None:
185             entries = self.entries
186
187         for entry in entries:
188             print " " * indent,
189             if entry["kind"] == "USER":
190                 print entry["kind"], entry["action"], entry["pattern"], entry["username"]
191             else:
192                 print entry["kind"], entry["action"], entry["pattern"]
193             if "children" in entry:
194                 self.dump(indent+1, entry["children"])
195
196     def get_all_entries(self, entries=None):
197          all_entries = []
198
199          if entries==None:
200              entries = self.entries
201
202          for entry in entries:
203              all_entries.append(entry)
204              if "children" in entry:
205                  all_entries.extend(self.get_all_entries(entry["children"]))
206
207          return all_entries
208
209 if __name__=="__main__":
210     parser = tpparse(fn="/usr/local/stork/var/packageinfo/stork-repository.cs.arizona.edu/packageinfo/tpfiles/digdugdemo.fda12c53d8a54531879fae38fcb0671efb553051.tpfile")
211     parser.dump(0)