Package sabayon :: Module mozilla_bookmarks
[hide private]
[frames] | no frames]

Source Code for Module sabayon.mozilla_bookmarks

  1  # 
  2  # Copyright (C) 2005 Red Hat, Inc. 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify 
  5  # it under the terms of the GNU General Public License as published by 
  6  # the Free Software Foundation; either version 2 of the License, or 
  7  # (at your option) any later version. 
  8  # 
  9  # This program is distributed in the hope that it will be useful, 
 10  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 12  # GNU General Public License for more details. 
 13  # 
 14  # You should have received a copy of the GNU General Public License 
 15  # along with this program; if not, write to the Free Software 
 16  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 17  # 
 18   
 19  # XXX - TODO: 
 20  # add support for DD tags 
 21  # add support for HR format tags 
 22   
 23  import sys 
 24  import os 
 25  import re 
 26  from HTMLParser import HTMLParser 
 27   
 28  try: 
 29      import util 
 30      import config 
 31  except: 
 32      from sabayon import util 
 33      from sabayon import config 
 34   
 35  debug = 0 
 36  indent = '    ' 
 37   
 38  bookmark_separator = "/" 
 39  TYPE_FOLDER     = 1 
 40  TYPE_BOOKMARK   = 2 
 41  TYPE_FOLDER_END = 3 
 42   
 43  tag_info_dict = { 
 44      'dt' : {'implicit_close_event' : ['begin'], 
 45              'implicit_close_scope' : ['dl'], 
 46              'implicit_close_tags'  : ['dt', 'dd']}, 
 47      'dd' : {'implicit_close_event' : ['begin'], 
 48              'implicit_close_scope' : ['dl'], 
 49              'implicit_close_tags'  : ['dd']}, 
 50      'dl' : {'implicit_close_event' : ['begin', 'end'], 
 51              'implicit_close_scope' : ['dl'], 
 52              'implicit_close_tags'  : ['dt', 'dd']}, 
 53      'p'  : {'simple_tag'           : True}, 
 54      'hr' : {'simple_tag'           : True}, 
 55  } 
 56   
 57  # FIXME: these should be defined one place; see mozillasource.py 
 58  LOG_OPERATION           = 0x00001 
 59  LOG_CHANGE              = 0x00002 
 60  LOG_IGNORED_CHANGE      = 0x00004 
 61  LOG_APPLY               = 0x00008 
 62  LOG_SYNC                = 0x00010 
 63  LOG_PARSE               = 0x00020 
 64  LOG_PREF                = 0x00040 
 65  LOG_FILE_CONTENTS       = 0x00080 
 66  LOG_DATA                = 0x00100 
 67  LOG_VERBOSE             = 0x10000 
 68   
69 -def dprint(mask, fmt, *args):
70 # FIXME: before debuglog was introduced, we could use the mask to filter 71 # which messages to log. Now we don't use it anymore. Is it still useful? 72 # If you change this, synchronize it with mozillasource.py 73 debuglog.debug_log (False, debuglog.DEBUG_LOG_DOMAIN_MOZILLA_SOURCE, fmt % args)
74
75 -class Bookmark:
76 - def __init__(self, folder, name):
77 self.folder = folder 78 self.name = name 79 self.attrs = {}
80
81 - def get_attr(self, name):
82 return self.attrs.get(name, None)
83
84 - def get_url(self):
85 return self.attrs.get("href", None)
86
87 - def path(self):
88 path = self.folder.path() 89 path.append(self) 90 return path
91
92 - def path_as_names(self, join=None):
93 path = self.folder.path_as_names() 94 path.append(self.name) 95 if join == None: 96 return path 97 else: 98 return join.join(path)
99
100 - def path_as_string(self):
102
103 -class BookmarkFolder:
104 - def __init__(self, name, parent):
105 self.reset(name, parent)
106
107 - def reset(self, name, parent):
108 self.name = name 109 self.parent = parent 110 self.attrs = {} 111 self.entries = []
112
113 - def entry_index(self, entry):
114 n_entries = len(self.entries) 115 i = 0 116 while (i < n_entries): 117 if self.entries[i] == entry: 118 return i 119 i += 1 120 return None
121
122 - def add_entry(self, entry):
123 self.entries.append(entry) 124 return entry
125
126 - def add_folder(self, folder):
127 if not isinstance(folder, BookmarkFolder): 128 folder = BookmarkFolder(folder, self) 129 self.entries.append(folder) 130 return folder
131
132 - def lookup_folder(self, folder):
133 for entry in self.entries: 134 if isinstance(entry, BookmarkFolder): 135 if entry == folder: 136 return entry 137 return None
138
139 - def add_bookmark(self, bookmark):
140 if not isinstance(bookmark, Bookmark): 141 bookmark = Bookmark(self, bookmark) 142 self.entries.append(bookmark) 143 return bookmark
144 145
146 - def lookup_bookmark(self, bookmark):
147 for entry in self.entries: 148 if isinstance(entry, Bookmark): 149 if entry == bookmark: 150 return entry 151 return None
152
153 - def lookup_path(self, path):
154 path_len = len(path) 155 i = 0 156 folder = self 157 while i < path_len - 1: 158 folder = folder.lookup_folder(path[i]) 159 if not folder: 160 return None 161 i += 1 162 entry_index = folder.entry_index(path[i]) 163 if entry_index == None: 164 return None 165 else: 166 return folder.entries[entry_index]
167 168
169 - def add_path_entry(self, path, entry):
170 path_len = len(path) 171 i = 0 172 parent = folder = self 173 while i < path_len - 1: 174 folder = parent.lookup_folder(path[i]) 175 if not folder: 176 folder = parent.add_folder(path[i]) 177 parent = folder 178 i += 1 179 if folder.entry_index(path[i]) == None: 180 folder.add_entry(path[i])
181 182
183 - def set_attr(self, name, value):
184 self.attrs[name] = value
185
186 - def get_attr(self, name):
187 return self.attrs.get(name, None)
188
189 - def get_url(self):
190 return self.attrs.get("href", None)
191
192 - def path(self):
193 path = [self] 194 folder = self 195 parent = self.parent 196 while parent: 197 path.append(parent) 198 parent = parent.parent 199 path.reverse() 200 return path
201
202 - def path_as_names(self, join=None):
203 path = self.path() 204 path = [ p.name for p in path ] 205 if join == None: 206 return path 207 else: 208 return join.join(path)
209
210 - def path_as_string(self):
212
213 - def _traverse(self, visit_func, path, data):
214 assert isinstance(self, BookmarkFolder) 215 216 path.append(self) 217 for entry in self.entries: 218 if isinstance(entry, BookmarkFolder): 219 visit_func(entry, TYPE_FOLDER, path, data) 220 entry._traverse(visit_func, path, data) 221 elif isinstance(entry, Bookmark): 222 visit_func(entry, TYPE_BOOKMARK, path, data) 223 else: 224 raise ValueError 225 path.pop() 226 visit_func(self, TYPE_FOLDER_END, path, data)
227
228 - def traverse(self, visit_func, data=None):
229 path = [] 230 self._traverse(visit_func, path, data)
231 232
233 - def find_bookmark(self, name):
234 result = [] 235 236 def visit(entry, type, path, data): 237 if type == TYPE_BOOKMARK: 238 if entry.name == name: 239 result.append(entry)
240 241 self.traverse(visit) 242 return result
243 244 # ---------------------------------- 245
246 -class HTMLTag:
247 - def __init__(self, tag):
248 self.tag = tag 249 self.attrs = {} 250 self.data = ""
251
252 -class BookmarkHTMLParser(HTMLParser):
253 - def __init__(self, root=None):
254 HTMLParser.__init__(self) 255 self.stack = [HTMLTag("None")] 256 self.folder_root = root 257 self.cur_folder = self.folder_root
258
259 - def set_root(self, root):
260 self.folder_root = root
261
262 - def get_root(self):
263 return self.folder_root
264
265 - def stack_to_string(self):
266 return "%s" % [ s.tag for s in self.stack ]
267
268 - def find_tag_on_stack(self, tag):
269 i = len(self.stack) - 1 270 while i >= 0: 271 if self.stack[i].tag == tag: 272 return self.stack[i] 273 i -= 1 274 return None
275
276 - def implicit_close(self, event, tag):
277 278 tag_info = tag_info_dict.get(tag, None) 279 if not tag_info: 280 return 281 282 implicit_close_event = tag_info.get('implicit_close_event', None) 283 if not implicit_close_event or not event in implicit_close_event: 284 return 285 286 implicit_close_scope = tag_info.get('implicit_close_scope', None) 287 implicit_close_tags = tag_info.get('implicit_close_tags', None) 288 if not (implicit_close_scope or implicit_close_tags): 289 return 290 291 scope_index = len(self.stack) - 1 292 while scope_index >= 0: 293 if self.stack[scope_index].tag in implicit_close_scope: 294 break 295 scope_index = scope_index - 1 296 297 i = scope_index + 1 298 while i < len(self.stack): 299 if self.stack[i].tag in implicit_close_tags: 300 break 301 i = i + 1 302 303 j = len(self.stack) - 1 304 while (j >= i): 305 self._handle_endtag(self.stack[j].tag) 306 j = j - 1
307 308
309 - def handle_starttag(self, tag, attrs):
310 self.implicit_close('begin', tag) 311 312 tag_info = tag_info_dict.get(tag, None) 313 if not tag_info: 314 simple_tag = False 315 else: 316 simple_tag = tag_info.get('simple_tag', False) 317 if not simple_tag: 318 top = HTMLTag(tag) 319 for attr, value in attrs: 320 top.attrs[attr] = value 321 self.stack.append(top)
322
323 - def _handle_endtag(self, tag):
324 top = self.stack.pop(); 325 if tag == "a": 326 bookmark = self.cur_folder.add_bookmark(top.data) 327 for attr, value in top.attrs.items(): 328 bookmark.attrs[attr] = value 329 if debug: 330 print "%sBookmark %s" % (indent*(len(self.cur_folder.path())),top.data) 331 332 elif top.tag == 'h3' or top.tag == 'h1': 333 # Folders are contained in a <DT><H3 attrs>name</H3> sequence 334 # Note, this is currently the only use of the H3 tag in a bookmark 335 # file so rather than looking for the aforementioned sequence an 336 # easy "hack" is to just look for an H3 tag, its attrs, and its 337 # data will be the folder name. Note <H1> is reserved for the 338 # root folder. 339 # 340 # Since this is a new folder, we add it as a folder to the 341 # currently open folder, it is effectively a push of the folder 342 # stack, but we maintain it as simply the currently open folder. 343 if top.tag == 'h3': 344 if self.cur_folder: 345 self.cur_folder = self.cur_folder.add_folder(top.data) 346 else: 347 self.cur_folder = self.folder_root 348 else: 349 # Tag is h1, must be the root folder 350 self.folder_root.reset(top.data, None) 351 self.cur_folder = self.folder_root 352 for attr, value in top.attrs.items(): 353 self.cur_folder.attrs[attr] = value 354 if debug: 355 print "%sPUSH Folder %s" % (indent*(len(self.cur_folder.path())-1),self.cur_folder.name) 356 elif top.tag == 'dl': 357 # Closing current folder, effectively pop it off the folder stack, 358 # the currently open folder is replaced by this folders parent. 359 if debug: 360 print "%sPOP Folder %s" % (indent*(len(self.cur_folder.path())-1),self.cur_folder.name) 361 self.cur_folder = self.cur_folder.parent 362 else: 363 pass
364 365 366 367
368 - def handle_endtag(self, tag):
369 self.implicit_close('end', tag) 370 # assert tag == self.stack[-1].tag 371 self._handle_endtag(tag)
372
373 - def handle_data(self, data):
374 tag = self.stack[-1] 375 data = data.strip() 376 tag.data = tag.data + data
377 378 # -----------------------
379 -def visit(entry, type, path, data=None):
380 max_len = 80 381 level = len(path)-1 382 383 if type == TYPE_FOLDER: 384 print "%sFolder: %s(%s) path = [%s]" % (indent*level, 385 entry.name[0:max_len], 386 data, entry.path_as_string()) 387 elif type == TYPE_BOOKMARK: 388 print "%sBookmark: %s" % (indent*(level), entry.name[0:max_len]) 389 elif type == TYPE_FOLDER_END: 390 pass 391 else: 392 raise ValueError 393 394 for attr, value in entry.attrs.items(): 395 print "%sAttr: %s = %s" % (indent*(level+1), attr, value[0:max_len])
396 397 # ----------------------- 398 399 if __name__ == "__main__": 400 bm_root = BookmarkFolder('bm', None) 401 bm_file = BookmarkHTMLParser() 402 bm_file.set_root(bm_root) 403 bm_file.feed(open('bookmarks.html').read()) 404 bm_file.close() 405