1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import sys
24 import os
25 import re
26 from HTMLParser import HTMLParser
27
28 try:
29 import util
30 import config
31 except:
32 from sabayon import util
33 from sabayon import config
34
35 debug = 0
36 indent = ' '
37
38 bookmark_separator = "/"
39 TYPE_FOLDER = 1
40 TYPE_BOOKMARK = 2
41 TYPE_FOLDER_END = 3
42
43 tag_info_dict = {
44 'dt' : {'implicit_close_event' : ['begin'],
45 'implicit_close_scope' : ['dl'],
46 'implicit_close_tags' : ['dt', 'dd']},
47 'dd' : {'implicit_close_event' : ['begin'],
48 'implicit_close_scope' : ['dl'],
49 'implicit_close_tags' : ['dd']},
50 'dl' : {'implicit_close_event' : ['begin', 'end'],
51 'implicit_close_scope' : ['dl'],
52 'implicit_close_tags' : ['dt', 'dd']},
53 'p' : {'simple_tag' : True},
54 'hr' : {'simple_tag' : True},
55 }
56
57
58 LOG_OPERATION = 0x00001
59 LOG_CHANGE = 0x00002
60 LOG_IGNORED_CHANGE = 0x00004
61 LOG_APPLY = 0x00008
62 LOG_SYNC = 0x00010
63 LOG_PARSE = 0x00020
64 LOG_PREF = 0x00040
65 LOG_FILE_CONTENTS = 0x00080
66 LOG_DATA = 0x00100
67 LOG_VERBOSE = 0x10000
68
74
77 self.folder = folder
78 self.name = name
79 self.attrs = {}
80
82 return self.attrs.get(name, None)
83
85 return self.attrs.get("href", None)
86
91
99
102
105 self.reset(name, parent)
106
107 - def reset(self, name, parent):
108 self.name = name
109 self.parent = parent
110 self.attrs = {}
111 self.entries = []
112
113 - def entry_index(self, entry):
114 n_entries = len(self.entries)
115 i = 0
116 while (i < n_entries):
117 if self.entries[i] == entry:
118 return i
119 i += 1
120 return None
121
122 - def add_entry(self, entry):
123 self.entries.append(entry)
124 return entry
125
131
133 for entry in self.entries:
134 if isinstance(entry, BookmarkFolder):
135 if entry == folder:
136 return entry
137 return None
138
140 if not isinstance(bookmark, Bookmark):
141 bookmark = Bookmark(self, bookmark)
142 self.entries.append(bookmark)
143 return bookmark
144
145
147 for entry in self.entries:
148 if isinstance(entry, Bookmark):
149 if entry == bookmark:
150 return entry
151 return None
152
167
168
169 - def add_path_entry(self, path, entry):
170 path_len = len(path)
171 i = 0
172 parent = folder = self
173 while i < path_len - 1:
174 folder = parent.lookup_folder(path[i])
175 if not folder:
176 folder = parent.add_folder(path[i])
177 parent = folder
178 i += 1
179 if folder.entry_index(path[i]) == None:
180 folder.add_entry(path[i])
181
182
184 self.attrs[name] = value
185
187 return self.attrs.get(name, None)
188
190 return self.attrs.get("href", None)
191
193 path = [self]
194 folder = self
195 parent = self.parent
196 while parent:
197 path.append(parent)
198 parent = parent.parent
199 path.reverse()
200 return path
201
203 path = self.path()
204 path = [ p.name for p in path ]
205 if join == None:
206 return path
207 else:
208 return join.join(path)
209
212
213 - def _traverse(self, visit_func, path, data):
214 assert isinstance(self, BookmarkFolder)
215
216 path.append(self)
217 for entry in self.entries:
218 if isinstance(entry, BookmarkFolder):
219 visit_func(entry, TYPE_FOLDER, path, data)
220 entry._traverse(visit_func, path, data)
221 elif isinstance(entry, Bookmark):
222 visit_func(entry, TYPE_BOOKMARK, path, data)
223 else:
224 raise ValueError
225 path.pop()
226 visit_func(self, TYPE_FOLDER_END, path, data)
227
228 - def traverse(self, visit_func, data=None):
231
232
234 result = []
235
236 def visit(entry, type, path, data):
237 if type == TYPE_BOOKMARK:
238 if entry.name == name:
239 result.append(entry)
240
241 self.traverse(visit)
242 return result
243
244
245
248 self.tag = tag
249 self.attrs = {}
250 self.data = ""
251
254 HTMLParser.__init__(self)
255 self.stack = [HTMLTag("None")]
256 self.folder_root = root
257 self.cur_folder = self.folder_root
258
260 self.folder_root = root
261
263 return self.folder_root
264
266 return "%s" % [ s.tag for s in self.stack ]
267
269 i = len(self.stack) - 1
270 while i >= 0:
271 if self.stack[i].tag == tag:
272 return self.stack[i]
273 i -= 1
274 return None
275
277
278 tag_info = tag_info_dict.get(tag, None)
279 if not tag_info:
280 return
281
282 implicit_close_event = tag_info.get('implicit_close_event', None)
283 if not implicit_close_event or not event in implicit_close_event:
284 return
285
286 implicit_close_scope = tag_info.get('implicit_close_scope', None)
287 implicit_close_tags = tag_info.get('implicit_close_tags', None)
288 if not (implicit_close_scope or implicit_close_tags):
289 return
290
291 scope_index = len(self.stack) - 1
292 while scope_index >= 0:
293 if self.stack[scope_index].tag in implicit_close_scope:
294 break
295 scope_index = scope_index - 1
296
297 i = scope_index + 1
298 while i < len(self.stack):
299 if self.stack[i].tag in implicit_close_tags:
300 break
301 i = i + 1
302
303 j = len(self.stack) - 1
304 while (j >= i):
305 self._handle_endtag(self.stack[j].tag)
306 j = j - 1
307
308
310 self.implicit_close('begin', tag)
311
312 tag_info = tag_info_dict.get(tag, None)
313 if not tag_info:
314 simple_tag = False
315 else:
316 simple_tag = tag_info.get('simple_tag', False)
317 if not simple_tag:
318 top = HTMLTag(tag)
319 for attr, value in attrs:
320 top.attrs[attr] = value
321 self.stack.append(top)
322
324 top = self.stack.pop();
325 if tag == "a":
326 bookmark = self.cur_folder.add_bookmark(top.data)
327 for attr, value in top.attrs.items():
328 bookmark.attrs[attr] = value
329 if debug:
330 print "%sBookmark %s" % (indent*(len(self.cur_folder.path())),top.data)
331
332 elif top.tag == 'h3' or top.tag == 'h1':
333
334
335
336
337
338
339
340
341
342
343 if top.tag == 'h3':
344 if self.cur_folder:
345 self.cur_folder = self.cur_folder.add_folder(top.data)
346 else:
347 self.cur_folder = self.folder_root
348 else:
349
350 self.folder_root.reset(top.data, None)
351 self.cur_folder = self.folder_root
352 for attr, value in top.attrs.items():
353 self.cur_folder.attrs[attr] = value
354 if debug:
355 print "%sPUSH Folder %s" % (indent*(len(self.cur_folder.path())-1),self.cur_folder.name)
356 elif top.tag == 'dl':
357
358
359 if debug:
360 print "%sPOP Folder %s" % (indent*(len(self.cur_folder.path())-1),self.cur_folder.name)
361 self.cur_folder = self.cur_folder.parent
362 else:
363 pass
364
365
366
367
372
374 tag = self.stack[-1]
375 data = data.strip()
376 tag.data = tag.data + data
377
378
379 -def visit(entry, type, path, data=None):
380 max_len = 80
381 level = len(path)-1
382
383 if type == TYPE_FOLDER:
384 print "%sFolder: %s(%s) path = [%s]" % (indent*level,
385 entry.name[0:max_len],
386 data, entry.path_as_string())
387 elif type == TYPE_BOOKMARK:
388 print "%sBookmark: %s" % (indent*(level), entry.name[0:max_len])
389 elif type == TYPE_FOLDER_END:
390 pass
391 else:
392 raise ValueError
393
394 for attr, value in entry.attrs.items():
395 print "%sAttr: %s = %s" % (indent*(level+1), attr, value[0:max_len])
396
397
398
399 if __name__ == "__main__":
400 bm_root = BookmarkFolder('bm', None)
401 bm_file = BookmarkHTMLParser()
402 bm_file.set_root(bm_root)
403 bm_file.feed(open('bookmarks.html').read())
404 bm_file.close()
405