Package sabayon :: Module cache
[hide private]
[frames] | no frames]

Source Code for Module sabayon.cache

  1  # 
  2  # Copyright (C) 2005 Red Hat, Inc. 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify 
  5  # it under the terms of the GNU General Public License as published by 
  6  # the Free Software Foundation; either version 2 of the License, or 
  7  # (at your option) any later version. 
  8  # 
  9  # This program is distributed in the hope that it will be useful, 
 10  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 12  # GNU General Public License for more details. 
 13  # 
 14  # You should have received a copy of the GNU General Public License 
 15  # along with this program; if not, write to the Free Software 
 16  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 17  # 
 18  import urllib2 
 19  import urlparse 
 20  import util 
 21  import os 
 22  import stat 
 23  import string 
 24  import libxml2 
 25  import debuglog 
 26   
27 -def dprint(fmt, *args):
28 debuglog.debug_log (False, debuglog.DEBUG_LOG_DOMAIN_CACHE, fmt % args)
29
30 -def get_home_dir():
31 return util.get_home_dir()
32
33 -class cacheRepository:
34 """This is a remote resource cache, based on Python native urllib2 35 and implementing a local cache for remote resources. It will 36 provide access to copies of the resources if unavailable and 37 try to transfer only files remotely modified from the cache."""
38 - def __init__(self, directory = None):
39 info = None 40 self.directory = None 41 self.catalog = None 42 self.root = None 43 # delay the directory check/creation until needed 44 self.orig_directory = directory
45
46 - def __ensure_directory(self, directory, remove_old):
47 try: 48 info = os.stat(directory) 49 if (not stat.S_ISDIR(info[0])) or (info[4] != os.getuid()): 50 dprint("File %s is not a directory", directory) 51 if not remove_old: 52 return None 53 try: 54 import shutil 55 shutil.rmtree(directory, True) 56 os.mkdir(directory) 57 dprint("Recreated directory %s", directory) 58 info = os.stat(directory) 59 except: 60 dprint("Failed to create directory %s", directory) 61 return None 62 except: 63 dprint("Failed to check directory %s", directory) 64 try: 65 os.mkdir(directory) 66 dprint("Created directory %s", directory) 67 info = os.stat(directory) 68 except: 69 dprint("Failed to create directory %s", directory) 70 return None 71 return info
72
73 - def __check_directory(self):
74 directory = self.orig_directory 75 if directory != None: 76 info = self.__ensure_directory (directory, False) 77 if info == None: 78 directory = None 79 80 if directory == None: 81 parent_info = self.__ensure_directory (get_home_dir() + "/.sabayon", True) 82 if parent_info: 83 directory = get_home_dir() + "/.sabayon/profile_cache" 84 info = self.__ensure_directory (directory, True) 85 86 if info == None: 87 dprint("Running with cache deactivated") 88 self.directory = None 89 return 90 else: 91 self.directory = directory 92 if stat.S_IMODE(info[0]) != stat.S_IRUSR + stat.S_IWUSR + stat.S_IXUSR: 93 dprint("Wrong mode for %s", directory) 94 try: 95 os.chmod(directory, stat.S_IRUSR + stat.S_IWUSR + stat.S_IXUSR) 96 except: 97 dprint("Failed to chmod %s, ignored", directory) 98 self.directory = None 99 return 100 if self.directory == None: 101 dprint("Running with cache deactivated") 102 return 103 104 catalogfile = self.directory + "/catalog.xml" 105 try: 106 self.catalog = libxml2.readFile(catalogfile, None, 107 libxml2.XML_PARSE_NOBLANKS) 108 except: 109 dprint("Failed to load catalog from %s" %(catalogfile)) 110 self.catalog = None 111 112 if self.catalog != None: 113 root = self.catalog.getRootElement() 114 if not root or root.name != "catalog": 115 dprint("Discarding corrupted catalog") 116 self.catalog.freeDoc () 117 self.catalog = None 118 else: 119 self.root = root 120 121 # remove empty catalogs 122 if self.catalog == None or self.root == None or \ 123 self.root.children == None: 124 try: 125 os.unlink(self.directory + "/catalog.xml") 126 except: 127 pass
128
129 - def __URL_mapping(self, URL):
130 """Function to convert an URL to a local name in the cache""" 131 URL = string.replace(URL, '//', "_") 132 URL = string.replace(URL, '/', "_") 133 return URL
134
135 - def __save_catalog(self):
136 """Save the on disk catalog in XML format""" 137 # don't save an empty catalog, and remove it if empty 138 if self.catalog == None or self.root == None or \ 139 self.root.children == None: 140 try: 141 os.unlink(self.directory + "/catalog.xml") 142 except: 143 pass 144 return 145 if self.catalog != None and self.directory != None: 146 f = open(self.directory + "/catalog.xml", "w") 147 f.write(self.catalog.serialize(format = 1)) 148 f.close()
149
150 - def __update_catalog(self, URL, timestamp = None):
151 """Update the catalog of resources in the cache with an updated entry""" 152 if URL == None: 153 return 154 modified = 0 155 156 # create the catalog if needed 157 if self.catalog == None: 158 self.catalog = libxml2.newDoc("1.0") 159 self.root = self.catalog.newChild (None, "catalog", None) 160 modified = 1 161 if self.root == None: 162 return 163 164 try: 165 child = self.root.xpathEval("/catalog/entry[@URL = '%s']" % URL)[0] 166 except: 167 child = None 168 if child == None: 169 child = self.root.newChild(None, "entry", None) 170 child.setProp("URL", URL) 171 if timestamp == None: 172 timestamp = "" 173 child.setProp("timestamp", timestamp) 174 modified = 1 175 else: 176 if child.prop("URL") == URL: 177 if timestamp != None: 178 if timestamp != child.prop("timestamp"): 179 child.setProp("timestamp", timestamp) 180 modified = 1 181 else: 182 child.setProp("timestamp", "") 183 modified = 1 184 if modified == 1: 185 self.__save_catalog()
186
187 - def __catalog_lookup(self, URL):
188 """lookup an entry in the catalog, it will return a tuple of the 189 file path and the timestamp if found, None otherwise. If the 190 file is referenced in the cache but has not timestamp then it 191 will return an empty string.""" 192 if self.root == None: 193 return None 194 try: 195 child = self.root.xpathEval("/catalog/entry[@URL = '%s']" % URL)[0] 196 except: 197 return None 198 filename = self.directory + "/" + self.__URL_mapping(URL) 199 try: 200 info = os.stat(filename) 201 except: 202 dprint("Local cache file for %s disapeared", URL) 203 child.unlinkNode() 204 child.freeNode() 205 return None 206 return child.prop("timestamp")
207
208 - def get_resource(self, URL):
209 """Get a resource from the cache. It may fetch it from the network 210 or use a local copy. It returns a Python file liek open() would. 211 If passed a filename it will accept it if absolute. 212 The return value is an absolute path to a local file.""" 213 file = None 214 try: 215 decomp = urlparse.urlparse(URL) 216 if decomp[2] == URL: 217 file = URL 218 except: 219 file = URL 220 if file != None: 221 if file[0] != '/': 222 return None 223 try: 224 return file 225 except: 226 dprint("Failed to read %s", file) 227 return None 228 else: 229 self.__check_directory() 230 filename = self.directory + "/" + self.__URL_mapping(URL) 231 timestamp = self.__catalog_lookup(URL) 232 last_modified = None 233 try: 234 request = urllib2.Request(URL) 235 if timestamp != None and timestamp != "": 236 request.add_header('If-Modified-Since', timestamp) 237 except: 238 dprint("Failed to create request for %s", URL) 239 return None 240 try: 241 opener = urllib2.build_opener() 242 # TODO handle time outs there .... 243 datastream = opener.open(request) 244 try: 245 last_modified = datastream.headers.dict['last-modified'] 246 except: 247 last_modified = None 248 data = datastream.read() 249 datastream.close() 250 except: 251 dprint("Resource not available or older using cache") 252 try: 253 info = os.stat(filename) 254 return filename 255 except: 256 dprint("Failed to find cache file %s", filename) 257 return None 258 try: 259 fd = open(filename, "w") 260 fd.write(data) 261 fd.close() 262 self.__update_catalog(URL, last_modified) 263 except: 264 dprint("Failed to write cache file %s", filename) 265 return None 266 return filename
267 268 default_cache = None 269
270 -def get_default_cache():
271 global default_cache 272 273 if default_cache == None: 274 default_cache = cacheRepository() 275 # now we can activate the entity loader 276 libxml2.setEntityLoader(libxml2_entity_loader) 277 278 return default_cache
279 280 # redefine libxml2 entity loader to use the default cache
281 -def libxml2_entity_loader(URL, ID, ctxt):
282 dprint("Cache entity loader called for %s '%s'", URL, ID) 283 the_cache = get_default_cache() 284 file = the_cache.get_resource(URL) 285 try: 286 fd = open(file) 287 dprint("Cache entity loader resolved to %s", file) 288 except: 289 fd = None 290 return fd
291 292 # don't report errors from libxml2 parsing
293 -def libxml2_no_error_callback(ctx, str):
294 pass
295 296 libxml2.registerErrorHandler(libxml2_no_error_callback, "") 297
298 -def initialize():
299 get_default_cache() 300 libxml2.setEntityLoader(libxml2_entity_loader)
301 302
303 -def run_unit_tests ():
304 import BaseHTTPServer 305 import SimpleHTTPServer 306 import shutil 307 import os 308 import thread 309 import time 310 311 class test_http_handler(SimpleHTTPServer.SimpleHTTPRequestHandler): 312 def do_GET(self): 313 SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
314 315 def log_message(self, format, *args): 316 pass 317 318 def run_http_server(www): 319 os.chdir(www) 320 server_address = ('', 8000) 321 httpd = BaseHTTPServer.HTTPServer(server_address,test_http_handler) 322 dprint("starting HTTP on %s" % (www)) 323 httpd.handle_request() 324 dprint("stopping HTTP server") 325 326 www = "/tmp/sabayon_http_test" 327 shutil.rmtree(www, True) 328 os.mkdir(www) 329 open(www + "/foo", "w").write("content") 330 server = thread.start_new_thread(run_http_server, (www,)) 331 332 dir = "/tmp/cache_test" 333 shutil.rmtree(dir, True) 334 cache = cacheRepository(dir) 335 336 f = cache.get_resource(www + "/foo") 337 assert(f != None) 338 data = open(f).read() 339 assert(data == "content") 340 dprint("absolute local path okay") 341 342 f = cache.get_resource("foo") 343 assert(f == None) 344 dprint("relative path okay") 345 346 # give time for the HTTP server to start 347 time.sleep(0.5) 348 349 f = cache.get_resource("http://localhost:8000/foo") 350 assert(f != None) 351 data = open(f).read() 352 assert(data == "content") 353 dprint("first HTTP access okay") 354 355 f = cache.get_resource("http://localhost:8000/foo") 356 assert(f != None) 357 data = open(f).read() 358 assert(data == "content") 359 dprint("second cached HTTP access okay") 360 361 # shutdown the cache, restart a new instance and try to get the resource 362 del cache 363 cache = cacheRepository(dir) 364 365 f = cache.get_resource("http://localhost:8000/foo") 366 assert(f != None) 367 data = open(f).read() 368 assert(data == "content") 369 dprint("New cache cached HTTP access okay") 370 371 shutil.rmtree(www, True) 372 shutil.rmtree(dir, True) 373 374 if __name__ == "__main__": 375 run_unit_tests() 376