gluon.cache

1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 """ 5 | This file is part of the web2py Web Framework 6 | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 7 | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 8 9 Basic caching classes and methods 10 --------------------------------- 11 12 - Cache - The generic caching object interfacing with the others 13 - CacheInRam - providing caching in ram 14 - CacheOnDisk - provides caches on disk 15 16 Memcache is also available via a different module (see gluon.contrib.memcache) 17 18 When web2py is running on Google App Engine, 19 caching will be provided by the GAE memcache 20 (see gluon.contrib.gae_memcache) 21 """ 22 import time 23 import portalocker 24 import shelve 25 import thread 26 import os 27 import logging 28 import re 29 import hashlib 30 import datetime 31 try: 32 from gluon import settings 33 have_settings = True 34 except ImportError: 35 have_settings = False 36 37 logger = logging.getLogger("web2py.cache") 38 39 __all__ = ['Cache', 'lazy_cache'] 40 41 42 DEFAULT_TIME_EXPIRE = 300

43 44 45 46 -class CacheAbstract(object):

47 """ 48 Abstract class for cache implementations. 49 Main function just provides referenced api documentation. 50 51 Use CacheInRam or CacheOnDisk instead which are derived from this class. 52 53 Note: 54 Michele says: there are signatures inside gdbm files that are used 55 directly by the python gdbm adapter that often are lagging behind in the 56 detection code in python part. 57 On every occasion that a gdbm store is probed by the python adapter, 58 the probe fails, because gdbm file version is newer. 59 Using gdbm directly from C would work, because there is backward 60 compatibility, but not from python! 61 The .shelve file is discarded and a new one created (with new 62 signature) and it works until it is probed again... 63 The possible consequences are memory leaks and broken sessions. 64 """ 65 66 cache_stats_name = 'web2py_cache_statistics' 67

68 - def __init__(self, request=None):

69 """Initializes the object 70 71 Args: 72 request: the global request object 73 """ 74 raise NotImplementedError

75

76 - def __call__(self, key, f, 77 time_expire=DEFAULT_TIME_EXPIRE):

78 """ 79 Tries to retrieve the value corresponding to `key` from the cache if the 80 object exists and if it did not expire, else it calls the function `f` 81 and stores the output in the cache corresponding to `key`. It always 82 returns the function that is returned. 83 84 Args: 85 key(str): the key of the object to be stored or retrieved 86 f(function): the function whose output is to be cached. 87 88 If `f` is `None` the cache is cleared. 89 time_expire(int): expiration of the cache in seconds. 90 91 It's used to compare the current time with the time 92 when the requested object was last saved in cache. It does not 93 affect future requests. Setting `time_expire` to 0 or negative 94 value forces the cache to refresh. 95 """ 96 raise NotImplementedError

97

98 - def clear(self, regex=None):

99 """ 100 Clears the cache of all keys that match the provided regular expression. 101 If no regular expression is provided, it clears all entries in cache. 102 103 Args: 104 regex: if provided, only keys matching the regex will be cleared, 105 otherwise all keys are cleared. 106 """ 107 108 raise NotImplementedError

109

110 - def increment(self, key, value=1):

111 """ 112 Increments the cached value for the given key by the amount in value 113 114 Args: 115 key(str): key for the cached object to be incremeneted 116 value(int): amount of the increment (defaults to 1, can be negative) 117 """ 118 raise NotImplementedError

119

120 - def _clear(self, storage, regex):

121 """ 122 Auxiliary function called by `clear` to search and clear cache entries 123 """ 124 r = re.compile(regex) 125 for (key, value) in storage.items(): 126 if r.match(str(key)): 127 del storage[key]

128

129 130 -class CacheInRam(CacheAbstract):

131 """ 132 Ram based caching 133 134 This is implemented as global (per process, shared by all threads) 135 dictionary. 136 A mutex-lock mechanism avoid conflicts. 137 """ 138 139 locker = thread.allocate_lock() 140 meta_storage = {} 141

142 - def __init__(self, request=None):

143 self.initialized = False 144 self.request = request 145 self.storage = {}

146

147 - def initialize(self):

148 if self.initialized: 149 return 150 else: 151 self.initialized = True 152 self.locker.acquire() 153 request = self.request 154 if request: 155 app = request.application 156 else: 157 app = '' 158 if not app in self.meta_storage: 159 self.storage = self.meta_storage[app] = { 160 CacheAbstract.cache_stats_name: {'hit_total': 0, 'misses': 0}} 161 else: 162 self.storage = self.meta_storage[app] 163 self.locker.release()

164

165 - def clear(self, regex=None):

166 self.initialize() 167 self.locker.acquire() 168 storage = self.storage 169 if regex is None: 170 storage.clear() 171 else: 172 self._clear(storage, regex) 173 174 if not CacheAbstract.cache_stats_name in storage.keys(): 175 storage[CacheAbstract.cache_stats_name] = { 176 'hit_total': 0, 'misses': 0} 177 178 self.locker.release()

179

180 - def __call__(self, key, f, 181 time_expire=DEFAULT_TIME_EXPIRE, 182 destroyer=None):

183 """ 184 Attention! cache.ram does not copy the cached object. 185 It just stores a reference to it. Turns out the deepcopying the object 186 has some problems: 187 188 - would break backward compatibility 189 - would be limiting because people may want to cache live objects 190 - would work unless we deepcopy no storage and retrival which would make 191 things slow. 192 193 Anyway. You can deepcopy explicitly in the function generating the value 194 to be cached. 195 """ 196 self.initialize() 197 198 dt = time_expire 199 now = time.time() 200 201 self.locker.acquire() 202 item = self.storage.get(key, None) 203 if item and f is None: 204 del self.storage[key] 205 if destroyer: 206 destroyer(item[1]) 207 self.storage[CacheAbstract.cache_stats_name]['hit_total'] += 1 208 self.locker.release() 209 210 if f is None: 211 return None 212 if item and (dt is None or item[0] > now - dt): 213 return item[1] 214 elif item and (item[0] < now - dt) and destroyer: 215 destroyer(item[1]) 216 value = f() 217 218 self.locker.acquire() 219 self.storage[key] = (now, value) 220 self.storage[CacheAbstract.cache_stats_name]['misses'] += 1 221 self.locker.release() 222 return value

223

224 - def increment(self, key, value=1):

225 self.initialize() 226 self.locker.acquire() 227 try: 228 if key in self.storage: 229 value = self.storage[key][1] + value 230 self.storage[key] = (time.time(), value) 231 except BaseException, e: 232 self.locker.release() 233 raise e 234 self.locker.release() 235 return value

236

237 238 -class CacheOnDisk(CacheAbstract):

239 """ 240 Disk based cache 241 242 This is implemented as a shelve object and it is shared by multiple web2py 243 processes (and threads) as long as they share the same filesystem. 244 The file is locked when accessed. 245 246 Disk cache provides persistance when web2py is started/stopped but it slower 247 than `CacheInRam` 248 249 Values stored in disk cache must be pickable. 250 """ 251

252 - def _close_shelve_and_unlock(self):

253 try: 254 if self.storage: 255 self.storage.close() 256 except ValueError: 257 pass 258 finally: 259 self.storage = None 260 if self.locker and self.locked: 261 portalocker.unlock(self.locker) 262 self.locker.close() 263 self.locked = False

264

265 - def _open_shelve_and_lock(self):

266 """Open and return a shelf object, obtaining an exclusive lock 267 on self.locker first. Replaces the close method of the 268 returned shelf instance with one that releases the lock upon 269 closing.""" 270 271 storage = None 272 locker = None 273 locked = False 274 try: 275 locker = locker = open(self.locker_name, 'a') 276 portalocker.lock(locker, portalocker.LOCK_EX) 277 locked = True 278 try: 279 storage = shelve.open(self.shelve_name) 280 except: 281 logger.error('corrupted cache file %s, will try rebuild it' 282 % self.shelve_name) 283 storage = None 284 if storage is None: 285 if os.path.exists(self.shelve_name): 286 os.unlink(self.shelve_name) 287 storage = shelve.open(self.shelve_name) 288 if not CacheAbstract.cache_stats_name in storage.keys(): 289 storage[CacheAbstract.cache_stats_name] = { 290 'hit_total': 0, 'misses': 0} 291 storage.sync() 292 except Exception, e: 293 if storage: 294 storage.close() 295 storage = None 296 if locked: 297 portalocker.unlock(locker) 298 locker.close() 299 locked = False 300 raise RuntimeError( 301 'unable to create/re-create cache file %s' % self.shelve_name) 302 self.locker = locker 303 self.locked = locked 304 self.storage = storage 305 return storage

306

307 - def __init__(self, request=None, folder=None):

308 self.initialized = False 309 self.request = request 310 self.folder = folder 311 self.storage = {}

312

313 - def initialize(self):

314 if self.initialized: 315 return 316 else: 317 self.initialized = True 318 folder = self.folder 319 request = self.request 320 321 # Lets test if the cache folder exists, if not 322 # we are going to create it 323 folder = folder or os.path.join(request.folder, 'cache') 324 325 if not os.path.exists(folder): 326 os.mkdir(folder) 327 328 ### we need this because of a possible bug in shelve that may 329 ### or may not lock 330 self.locker_name = os.path.join(folder, 'cache.lock') 331 self.shelve_name = os.path.join(folder, 'cache.shelve')

332

333 - def clear(self, regex=None):

334 self.initialize() 335 storage = self._open_shelve_and_lock() 336 try: 337 if regex is None: 338 storage.clear() 339 else: 340 self._clear(storage, regex) 341 storage.sync() 342 finally: 343 self._close_shelve_and_unlock()

344

345 - def __call__(self, key, f, 346 time_expire=DEFAULT_TIME_EXPIRE):

347 self.initialize() 348 dt = time_expire 349 storage = self._open_shelve_and_lock() 350 try: 351 item = storage.get(key, None) 352 storage[CacheAbstract.cache_stats_name]['hit_total'] += 1 353 if item and f is None: 354 del storage[key] 355 storage.sync() 356 now = time.time() 357 if f is None: 358 value = None 359 elif item and (dt is None or item[0] > now - dt): 360 value = item[1] 361 else: 362 value = f() 363 storage[key] = (now, value) 364 storage[CacheAbstract.cache_stats_name]['misses'] += 1 365 storage.sync() 366 finally: 367 self._close_shelve_and_unlock() 368 369 return value

370

371 - def increment(self, key, value=1):

372 self.initialize() 373 storage = self._open_shelve_and_lock() 374 try: 375 if key in storage: 376 value = storage[key][1] + value 377 storage[key] = (time.time(), value) 378 storage.sync() 379 finally: 380 self._close_shelve_and_unlock() 381 return value

382

383 -class CacheAction(object):

384 - def __init__(self, func, key, time_expire, cache, cache_model):

385 self.__name__ = func.__name__ 386 self.__doc__ = func.__doc__ 387 self.func = func 388 self.key = key 389 self.time_expire = time_expire 390 self.cache = cache 391 self.cache_model = cache_model

392

393 - def __call__(self, *a, **b):

394 if not self.key: 395 key2 = self.__name__ + ':' + repr(a) + ':' + repr(b) 396 else: 397 key2 = self.key.replace('%(name)s', self.__name__)\ 398 .replace('%(args)s', str(a)).replace('%(vars)s', str(b)) 399 cache_model = self.cache_model 400 if not cache_model or isinstance(cache_model, str): 401 cache_model = getattr(self.cache, cache_model or 'ram') 402 return cache_model(key2, 403 lambda a=a, b=b: self.func(*a, **b), 404 self.time_expire)

405

406 407 -class Cache(object):

408 """ 409 Sets up generic caching, creating an instance of both CacheInRam and 410 CacheOnDisk. 411 In case of GAE will make use of gluon.contrib.gae_memcache. 412 413 - self.ram is an instance of CacheInRam 414 - self.disk is an instance of CacheOnDisk 415 """ 416 417 autokey = ':%(name)s:%(args)s:%(vars)s' 418

419 - def __init__(self, request):

420 """ 421 Args: 422 request: the global request object 423 """ 424 # GAE will have a special caching 425 if have_settings and settings.global_settings.web2py_runtime_gae: 426 from gluon.contrib.gae_memcache import MemcacheClient 427 self.ram = self.disk = MemcacheClient(request) 428 else: 429 # Otherwise use ram (and try also disk) 430 self.ram = CacheInRam(request) 431 try: 432 self.disk = CacheOnDisk(request) 433 except IOError: 434 logger.warning('no cache.disk (IOError)') 435 except AttributeError: 436 # normally not expected anymore, as GAE has already 437 # been accounted for 438 logger.warning('no cache.disk (AttributeError)')

439

440 - def action(self, time_expire=DEFAULT_TIME_EXPIRE, cache_model=None, 441 prefix=None, session=False, vars=True, lang=True, 442 user_agent=False, public=True, valid_statuses=None, 443 quick=None):

444 """Better fit for caching an action 445 446 Warning: 447 Experimental! 448 449 Currently only HTTP 1.1 compliant 450 reference : http://code.google.com/p/doctype-mirror/wiki/ArticleHttpCaching 451 452 Args: 453 time_expire(int): same as @cache 454 cache_model(str): same as @cache 455 prefix(str): add a prefix to the calculated key 456 session(bool): adds response.session_id to the key 457 vars(bool): adds request.env.query_string 458 lang(bool): adds T.accepted_language 459 user_agent(bool or dict): if True, adds is_mobile and is_tablet to the key. 460 Pass a dict to use all the needed values (uses str(.items())) 461 (e.g. user_agent=request.user_agent()). Used only if session is 462 not True 463 public(bool): if False forces the Cache-Control to be 'private' 464 valid_statuses: by default only status codes starting with 1,2,3 will be cached. 465 pass an explicit list of statuses on which turn the cache on 466 quick: Session,Vars,Lang,User-agent,Public: 467 fast overrides with initials, e.g. 'SVLP' or 'VLP', or 'VLP' 468 """ 469 from gluon import current 470 from gluon.http import HTTP 471 def wrap(func): 472 def wrapped_f(): 473 if current.request.env.request_method != 'GET': 474 return func() 475 if time_expire: 476 cache_control = 'max-age=%(time_expire)s, s-maxage=%(time_expire)s' % dict(time_expire=time_expire) 477 if quick: 478 session_ = True if 'S' in quick else False 479 vars_ = True if 'V' in quick else False 480 lang_ = True if 'L' in quick else False 481 user_agent_ = True if 'U' in quick else False 482 public_ = True if 'P' in quick else False 483 else: 484 session_, vars_, lang_, user_agent_, public_ = session, vars, lang, user_agent, public 485 if not session_ and public_: 486 cache_control += ', public' 487 expires = (current.request.utcnow + datetime.timedelta(seconds=time_expire)).strftime('%a, %d %b %Y %H:%M:%S GMT') 488 else: 489 cache_control += ', private' 490 expires = 'Fri, 01 Jan 1990 00:00:00 GMT' 491 if cache_model: 492 #figure out the correct cache key 493 cache_key = [current.request.env.path_info, current.response.view] 494 if session_: 495 cache_key.append(current.response.session_id) 496 elif user_agent_: 497 if user_agent_ is True: 498 cache_key.append("%(is_mobile)s_%(is_tablet)s" % current.request.user_agent()) 499 else: 500 cache_key.append(str(user_agent_.items())) 501 if vars_: 502 cache_key.append(current.request.env.query_string) 503 if lang_: 504 cache_key.append(current.T.accepted_language) 505 cache_key = hashlib.md5('__'.join(cache_key)).hexdigest() 506 if prefix: 507 cache_key = prefix + cache_key 508 try: 509 #action returns something 510 rtn = cache_model(cache_key, lambda : func(), time_expire=time_expire) 511 http, status = None, current.response.status 512 except HTTP, e: 513 #action raises HTTP (can still be valid) 514 rtn = cache_model(cache_key, lambda : e.body, time_expire=time_expire) 515 http, status = HTTP(e.status, rtn, **e.headers), e.status 516 else: 517 #action raised a generic exception 518 http = None 519 else: 520 #no server-cache side involved 521 try: 522 #action returns something 523 rtn = func() 524 http, status = None, current.response.status 525 except HTTP, e: 526 #action raises HTTP (can still be valid) 527 status = e.status 528 http = HTTP(e.status, e.body, **e.headers) 529 else: 530 #action raised a generic exception 531 http = None 532 send_headers = False 533 if http and isinstance(valid_statuses, list): 534 if status in valid_statuses: 535 send_headers = True 536 elif valid_statuses is None: 537 if str(status)[0] in '123': 538 send_headers = True 539 if send_headers: 540 headers = { 541 'Pragma' : None, 542 'Expires' : expires, 543 'Cache-Control' : cache_control 544 } 545 current.response.headers.update(headers) 546 if cache_model and not send_headers: 547 #we cached already the value, but the status is not valid 548 #so we need to delete the cached value 549 cache_model(cache_key, None) 550 if http: 551 if send_headers: 552 http.headers.update(current.response.headers) 553 raise http 554 return rtn

555 wrapped_f.__name__ = func.__name__ 556 wrapped_f.__doc__ = func.__doc__ 557 return wrapped_f

558 return wrap 559

560 - def __call__(self, 561 key=None, 562 time_expire=DEFAULT_TIME_EXPIRE, 563 cache_model=None):

564 """ 565 Decorator function that can be used to cache any function/method. 566 567 Args: 568 key(str) : the key of the object to be store or retrieved 569 time_expire(int) : expiration of the cache in seconds 570 `time_expire` is used to compare the current time with the time 571 when the requested object was last saved in cache. 572 It does not affect future requests. 573 Setting `time_expire` to 0 or negative value forces the cache to 574 refresh. 575 cache_model(str): can be "ram", "disk" or other (like "memcache"). 576 Defaults to "ram" 577 578 When the function `f` is called, web2py tries to retrieve 579 the value corresponding to `key` from the cache if the 580 object exists and if it did not expire, else it calles the function `f` 581 and stores the output in the cache corresponding to `key`. In the case 582 the output of the function is returned. 583 584 Example: :: 585 586 @cache('key', 5000, cache.ram) 587 def f(): 588 return time.ctime() 589 590 Note: 591 If the function `f` is an action, we suggest using 592 @cache.action instead 593 """ 594 595 def tmp(func, cache=self, cache_model=cache_model): 596 return CacheAction(func, key, time_expire, self, cache_model)

597 return tmp 598 599 @staticmethod

600 - def with_prefix(cache_model, prefix):

601 """ 602 allow replacing cache.ram with cache.with_prefix(cache.ram,'prefix') 603 it will add prefix to all the cache keys used. 604 """ 605 return lambda key, f, time_expire=DEFAULT_TIME_EXPIRE, prefix=prefix:\ 606 cache_model(prefix + key, f, time_expire)

607

608 609 -def lazy_cache(key=None, time_expire=None, cache_model='ram'):

610 """ 611 Can be used to cache any function including ones in modules, 612 as long as the cached function is only called within a web2py request 613 614 If a key is not provided, one is generated from the function name 615 `time_expire` defaults to None (no cache expiration) 616 617 If cache_model is "ram" then the model is current.cache.ram, etc. 618 """ 619 def decorator(f, key=key, time_expire=time_expire, cache_model=cache_model): 620 key = key or repr(f) 621 622 def g(*c, **d): 623 from gluon import current 624 return current.cache(key, time_expire, cache_model)(f)(*c, **d)

625 g.__name__ = f.__name__ 626 return g 627 return decorator 628

Source Code for Module gluon.cache