Package gluon :: Module rewrite
[hide private]
[frames] | no frames]

Source Code for Module gluon.rewrite

   1  #!/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3   
   4  """ 
   5  | This file is part of the web2py Web Framework 
   6  | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 
   7  | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 
   8   
   9  gluon.rewrite parses incoming URLs and formats outgoing URLs for gluon.html.URL. 
  10   
  11  In addition, it rewrites both incoming and outgoing URLs based on the (optional) user-supplied routes.py, 
  12  which also allows for rewriting of certain error messages. 
  13   
  14  routes.py supports two styles of URL rewriting, depending on whether 'routers' is defined. 
  15  Refer to router.example.py and routes.example.py for additional documentation. 
  16   
  17  """ 
  18   
  19  import os 
  20  import re 
  21  import logging 
  22  import traceback 
  23  import threading 
  24  import urllib 
  25  from gluon.storage import Storage, List 
  26  from gluon.http import HTTP 
  27  from gluon.fileutils import abspath, read_file 
  28  from gluon.settings import global_settings 
  29   
  30  isdir = os.path.isdir 
  31  isfile = os.path.isfile 
  32  exists = os.path.exists 
  33  pjoin = os.path.join 
  34   
  35  logger = logging.getLogger('web2py.rewrite') 
  36  THREAD_LOCAL = threading.local()  # thread-local storage for routing params 
  37   
  38  regex_at = re.compile(r'(?<!\\)\$[a-zA-Z]\w*') 
  39  regex_anything = re.compile(r'(?<!\\)\$anything') 
  40  regex_redirect = re.compile(r'(\d+)->(.*)') 
  41  regex_full_url = re.compile( 
  42      r'^(?P<scheme>http|https|HTTP|HTTPS)\://(?P<host>[^/]*)(?P<uri>.*)') 
  43  regex_version = re.compile(r'^(_[\d]+\.[\d]+\.[\d]+)$') 
  44   
  45  # pattern to find valid paths in url /application/controller/... 
  46  #   this could be: 
  47  #     for static pages: 
  48  #        /<b:application>/static/<x:file> 
  49  #     for dynamic pages: 
  50  #        /<a:application>[/<c:controller>[/<f:function>[.<e:ext>][/<s:args>]]] 
  51  #   application, controller, function and ext may only contain [a-zA-Z0-9_] 
  52  #   file and args may also contain '-', '=', '.' and '/' 
  53  #   apps in routes_apps_raw must parse raw_args into args 
  54   
  55  regex_url = re.compile('^/((?P<a>\w+)(/(?P<c>\w+)(/(?P<z>(?P<f>\w+)(\.(?P<e>[\w.]+))?(?P<s>.*)))?)?)?$') 
  56  regex_args = re.compile('[^\w/.@=-]') 
57 58 59 -def _router_default():
60 "Returns new copy of default base router" 61 router = Storage( 62 default_application='init', 63 applications='ALL', 64 default_controller='default', 65 controllers='DEFAULT', 66 default_function='index', 67 functions=dict(), 68 default_language=None, 69 languages=None, 70 root_static=['favicon.ico', 'robots.txt'], 71 map_static=None, 72 domains=None, 73 exclusive_domain=False, 74 map_hyphen=False, 75 acfe_match=r'\w+$', # legal app/ctlr/fcn/ext 76 # 77 # Implementation note: 78 # The file_match & args_match patterns use look-behind to avoid 79 # pathological backtracking from nested patterns. 80 # 81 file_match = r'([-+=@$%\w]|(?<=[-+=@$%\w])[./])*$', # legal static subpath 82 args_match=r'([\w@ -]|(?<=[\w@ -])[.=])*$', # legal arg in args 83 ) 84 return router
85
86 87 -def _params_default(app=None):
88 "Returns a new copy of default parameters" 89 p = Storage() 90 p.name = app or "BASE" 91 p.default_application = app or "init" 92 p.default_controller = "default" 93 p.default_function = "index" 94 p.routes_app = [] 95 p.routes_in = [] 96 p.routes_out = [] 97 p.routes_onerror = [] 98 p.routes_apps_raw = [] 99 p.error_handler = None 100 p.error_message = '<html><body><h1>%s</h1></body></html>' 101 p.error_message_ticket = \ 102 '<html><body><h1>Internal error</h1>Ticket issued: <a href="/admin/default/ticket/%(ticket)s" target="_blank">%(ticket)s</a></body><!-- this is junk text else IE does not display the page: ' + ('x' * 512) + ' //--></html>' 103 p.routers = None 104 p.logging = 'off' 105 return p
106 107 params_apps = dict() 108 params = _params_default(app=None) # regex rewrite parameters 109 THREAD_LOCAL.routes = params # default to base regex rewrite parameters 110 routers = None
111 112 113 -def log_rewrite(string):
114 "Log rewrite activity under control of routes.py" 115 if params.logging == 'debug': # catch common cases first 116 logger.debug(string) 117 elif params.logging == 'off' or not params.logging: 118 pass 119 elif params.logging == 'print': 120 print string 121 elif params.logging == 'info': 122 logger.info(string) 123 elif params.logging == 'warning': 124 logger.warning(string) 125 elif params.logging == 'error': 126 logger.error(string) 127 elif params.logging == 'critical': 128 logger.critical(string) 129 else: 130 logger.debug(string)
131 132 ROUTER_KEYS = set( 133 ('default_application', 'applications', 134 'default_controller', 'controllers', 135 'default_function', 'functions', 136 'default_language', 'languages', 137 'domain', 'domains', 'root_static', 'path_prefix', 138 'exclusive_domain', 'map_hyphen', 'map_static', 139 'acfe_match', 'file_match', 'args_match')) 140 141 ROUTER_BASE_KEYS = set( 142 ('applications', 'default_application', 143 'domains', 'path_prefix'))
144 145 # The external interface to rewrite consists of: 146 # 147 # load: load routing configuration file(s) 148 # url_in: parse and rewrite incoming URL 149 # url_out: assemble and rewrite outgoing URL 150 # 151 # THREAD_LOCAL.routes.default_application 152 # THREAD_LOCAL.routes.error_message 153 # THREAD_LOCAL.routes.error_message_ticket 154 # THREAD_LOCAL.routes.try_redirect_on_error 155 # THREAD_LOCAL.routes.error_handler 156 # 157 # filter_url: helper for doctest & unittest 158 # filter_err: helper for doctest & unittest 159 # regex_filter_out: doctest 160 161 162 -def fixup_missing_path_info(environ):
163 eget = environ.get 164 path_info = eget('PATH_INFO') 165 request_uri = eget('REQUEST_URI') 166 if not path_info and request_uri: 167 # for fcgi, get path_info and 168 # query_string from request_uri 169 items = request_uri.split('?') 170 path_info = environ['PATH_INFO'] = items[0] 171 environ['QUERY_STRING'] = items[1] if len(items) > 1 else '' 172 elif not request_uri: 173 query_string = eget('QUERY_STRING') 174 if query_string: 175 environ['REQUEST_URI'] = '%s?%s' % (path_info, query_string) 176 else: 177 environ['REQUEST_URI'] = path_info 178 if not eget('HTTP_HOST'): 179 environ['HTTP_HOST'] = \ 180 '%s:%s' % (eget('SERVER_NAME'), eget('SERVER_PORT'))
181
182 183 -def url_in(request, environ):
184 "Parses and rewrites incoming URL" 185 if routers: 186 return map_url_in(request, environ) 187 return regex_url_in(request, environ)
188
189 190 -def url_out(request, environ, application, controller, function, 191 args, other, scheme, host, port, language=None):
192 "Assembles and rewrites outgoing URL" 193 if routers: 194 acf = map_url_out(request, environ, application, controller, 195 function, args, other, scheme, host, port, language) 196 url = '%s%s' % (acf, other) 197 else: 198 url = '/%s/%s/%s%s' % (application, controller, function, other) 199 url = regex_filter_out(url, environ) 200 # 201 # fill in scheme and host if absolute URL is requested 202 # scheme can be a string, eg 'http', 'https', 'ws', 'wss' 203 # 204 if host is True or (host is None and (scheme or port is not None)): 205 host = request.env.http_host 206 if not scheme or scheme is True: 207 scheme = request.env.get('wsgi_url_scheme', 'http').lower() \ 208 if request else 'http' 209 if host: 210 host_port = host if not port else host.split(':', 1)[0] + ':%s' % port 211 url = '%s://%s%s' % (scheme, host_port, url) 212 return url
213
214 215 -def try_rewrite_on_error(http_response, request, environ, ticket=None):
216 """ 217 Called from main.wsgibase to rewrite the http response. 218 """ 219 status = int(str(http_response.status).split()[0]) 220 if status >= 399 and THREAD_LOCAL.routes.routes_onerror: 221 keys = set(('%s/%s' % (request.application, status), 222 '%s/*' % (request.application), 223 '*/%s' % (status), 224 '*/*')) 225 for (key, uri) in THREAD_LOCAL.routes.routes_onerror: 226 if key in keys: 227 if uri == '!': 228 # do nothing! 229 return http_response, environ 230 elif '?' in uri: 231 path_info, query_string = uri.split('?', 1) 232 query_string += '&' 233 else: 234 path_info, query_string = uri, '' 235 query_string += \ 236 'code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 237 (status, ticket, urllib.quote_plus( 238 request.env.request_uri), request.url) 239 if uri.startswith('http://') or uri.startswith('https://'): 240 # make up a response 241 url = path_info + '?' + query_string 242 message = 'You are being redirected <a href="%s">here</a>' 243 return HTTP(303, message % url, Location=url), environ 244 elif not environ.get('__ROUTES_ONERROR__', False): 245 # wsgibase will be called recursively with 246 # the routes_onerror path. 247 environ['__ROUTES_ONERROR__'] = True # limit recursion 248 path_info = '/' + path_info.lstrip('/') 249 environ['PATH_INFO'] = path_info 250 environ['QUERY_STRING'] = query_string 251 environ['WEB2PY_STATUS_CODE'] = status 252 return None, environ 253 # do nothing! 254 return http_response, environ
255
256 257 -def try_redirect_on_error(http_object, request, ticket=None):
258 "Called from main.wsgibase to rewrite the http response" 259 status = int(str(http_object.status).split()[0]) 260 if status > 399 and THREAD_LOCAL.routes.routes_onerror: 261 keys = set(('%s/%s' % (request.application, status), 262 '%s/*' % (request.application), 263 '*/%s' % (status), 264 '*/*')) 265 for (key, redir) in THREAD_LOCAL.routes.routes_onerror: 266 if key in keys: 267 if redir == '!': 268 break 269 elif '?' in redir: 270 url = '%s&code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 271 (redir, status, ticket, 272 urllib.quote_plus(request.env.request_uri), 273 request.url) 274 else: 275 url = '%s?code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 276 (redir, status, ticket, 277 urllib.quote_plus(request.env.request_uri), 278 request.url) 279 return HTTP(303, 'You are being redirected <a href="%s">here</a>' % url, Location=url) 280 return http_object
281
282 283 -def load(routes='routes.py', app=None, data=None, rdict=None):
284 """ 285 load: read (if file) and parse routes 286 store results in params 287 (called from main.py at web2py initialization time) 288 If data is present, it's used instead of the routes.py contents. 289 If rdict is present, it must be a dict to be used for routers (unit test) 290 """ 291 global params 292 global routers 293 if app is None: 294 # reinitialize 295 global params_apps 296 params_apps = dict() 297 params = _params_default(app=None) # regex rewrite parameters 298 THREAD_LOCAL.routes = params # default to base regex rewrite parameters 299 routers = None 300 301 if isinstance(rdict, dict): 302 symbols = dict(routers=rdict) 303 path = 'rdict' 304 else: 305 if data is not None: 306 path = 'routes' 307 else: 308 if app is None: 309 path = abspath(routes) 310 else: 311 path = abspath('applications', app, routes) 312 if not exists(path): 313 return 314 data = read_file(path).replace('\r\n', '\n') 315 316 symbols = dict(app=app) 317 try: 318 exec (data + '\n') in symbols 319 except SyntaxError, e: 320 logger.error( 321 '%s has a syntax error and will not be loaded\n' % path 322 + traceback.format_exc()) 323 raise e 324 325 p = _params_default(app) 326 327 for sym in ('routes_app', 'routes_in', 'routes_out'): 328 if sym in symbols: 329 for items in symbols[sym]: 330 p[sym].append(compile_regex(*items)) 331 for sym in ('routes_onerror', 'routes_apps_raw', 332 'error_handler', 'error_message', 'error_message_ticket', 333 'default_application', 'default_controller', 'default_function', 334 'logging'): 335 if sym in symbols: 336 p[sym] = symbols[sym] 337 if 'routers' in symbols: 338 p.routers = Storage(symbols['routers']) 339 for key in p.routers: 340 if isinstance(p.routers[key], dict): 341 p.routers[key] = Storage(p.routers[key]) 342 343 if app is None: 344 params = p # install base rewrite parameters 345 THREAD_LOCAL.routes = params # install default as current routes 346 # 347 # create the BASE router if routers in use 348 # 349 routers = params.routers # establish routers if present 350 if isinstance(routers, dict): 351 routers = Storage(routers) 352 if routers is not None: 353 router = _router_default() 354 if routers.BASE: 355 router.update(routers.BASE) 356 routers.BASE = router 357 358 # scan each app in applications/ 359 # create a router, if routers are in use 360 # parse the app-specific routes.py if present 361 # 362 all_apps = [] 363 apppath = abspath('applications') 364 for appname in os.listdir(apppath): 365 if not appname.startswith('.') and \ 366 isdir(abspath(apppath, appname)) and \ 367 isdir(abspath(apppath, appname, 'controllers')): 368 all_apps.append(appname) 369 if routers: 370 router = Storage(routers.BASE) # new copy 371 if appname in routers: 372 for key in routers[appname].keys(): 373 if key in ROUTER_BASE_KEYS: 374 raise SyntaxError("BASE-only key '%s' in router '%s'" % (key, appname)) 375 router.update(routers[appname]) 376 routers[appname] = router 377 if exists(abspath('applications', appname, routes)): 378 load(routes, appname) 379 380 if routers: 381 load_routers(all_apps) 382 383 else: # app 384 params_apps[app] = p 385 if routers and p.routers: 386 if app in p.routers: 387 routers[app].update(p.routers[app]) 388 389 log_rewrite('URL rewrite is on. configuration in %s' % path)
390
391 392 -def compile_regex(k, v, env=None):
393 """ 394 Preprocess and compile the regular expressions in routes_app/in/out 395 The resulting regex will match a pattern of the form:: 396 397 [remote address]:[protocol]://[host]:[method] [path] 398 399 We allow abbreviated regexes on input; here we try to complete them. 400 """ 401 k0 = k # original k for error reporting 402 # bracket regex in ^...$ if not already done 403 if not k[0] == '^': 404 k = '^%s' % k 405 if not k[-1] == '$': 406 k = '%s$' % k 407 # if there are no :-separated parts, prepend a catch-all for the IP address 408 if k.find(':') < 0: 409 # k = '^.*?:%s' % k[1:] 410 k = '^.*?:https?://[^:/]+:[a-z]+ %s' % k[1:] 411 # if there's no ://, provide a catch-all for the protocol, host & method 412 if k.find('://') < 0: 413 i = k.find(':/') 414 if i < 0: 415 raise SyntaxError("routes pattern syntax error: path needs leading '/' [%s]" % k0) 416 k = r'%s:https?://[^:/]+:[a-z]+ %s' % (k[:i], k[i + 1:]) 417 # $anything -> ?P<anything>.* 418 for item in regex_anything.findall(k): 419 k = k.replace(item, '(?P<anything>.*)') 420 # $a (etc) -> ?P<a>\w+ 421 for item in regex_at.findall(k): 422 k = k.replace(item, r'(?P<%s>\w+)' % item[1:]) 423 # same for replacement pattern, but with \g 424 for item in regex_at.findall(v): 425 v = v.replace(item, r'\g<%s>' % item[1:]) 426 return (re.compile(k, re.DOTALL), v, env or {})
427
428 429 -def load_routers(all_apps):
430 "Load-time post-processing of routers" 431 432 for app in routers: 433 # initialize apps with routers that aren't present, 434 # on behalf of unit tests 435 if app not in all_apps: 436 all_apps.append(app) 437 router = Storage(routers.BASE) # new copy 438 if app != 'BASE': 439 keys = set(routers[app]).intersection(ROUTER_BASE_KEYS) 440 if keys: 441 raise SyntaxError("BASE-only key(s) %s in router '%s'" % ( 442 tuple(keys), app)) 443 router.update(routers[app]) 444 routers[app] = router 445 router = routers[app] 446 keys = set(router).difference(ROUTER_KEYS) 447 if keys: 448 raise SyntaxError("unknown key(s) %s in router '%s'" % ( 449 tuple(keys), app)) 450 if not router.controllers: 451 router.controllers = set() 452 elif not isinstance(router.controllers, str): 453 router.controllers = set(router.controllers) 454 if router.languages: 455 router.languages = set(router.languages) 456 else: 457 router.languages = set() 458 if router.functions: 459 if isinstance(router.functions, (set, tuple, list)): 460 functions = set(router.functions) 461 if isinstance(router.default_function, str): 462 functions.add( 463 router.default_function) # legacy compatibility 464 router.functions = {router.default_controller: functions} 465 for controller in router.functions: 466 router.functions[controller] = set( 467 router.functions[controller]) 468 else: 469 router.functions = dict() 470 if app != 'BASE': 471 for base_only in ROUTER_BASE_KEYS: 472 router.pop(base_only, None) 473 if 'domain' in router: 474 routers.BASE.domains[router.domain] = app 475 if isinstance(router.controllers, str) and router.controllers == 'DEFAULT': 476 router.controllers = set() 477 if isdir(abspath('applications', app)): 478 cpath = abspath('applications', app, 'controllers') 479 for cname in os.listdir(cpath): 480 if isfile(abspath(cpath, cname)) and cname.endswith('.py'): 481 router.controllers.add(cname[:-3]) 482 if router.controllers: 483 router.controllers.add('static') 484 router.controllers.add(router.default_controller) 485 486 if isinstance(routers.BASE.applications, str) and routers.BASE.applications == 'ALL': 487 routers.BASE.applications = list(all_apps) 488 if routers.BASE.applications: 489 routers.BASE.applications = set(routers.BASE.applications) 490 else: 491 routers.BASE.applications = set() 492 493 for app in routers.keys(): 494 # set router name 495 router = routers[app] 496 router.name = app 497 # compile URL validation patterns 498 router._acfe_match = re.compile(router.acfe_match) 499 router._file_match = re.compile(router.file_match) 500 if router.args_match: 501 router._args_match = re.compile(router.args_match) 502 # convert path_prefix to a list of path elements 503 if router.path_prefix: 504 if isinstance(router.path_prefix, str): 505 router.path_prefix = router.path_prefix.strip('/').split('/') 506 507 # rewrite BASE.domains as tuples 508 # 509 # key: 'domain[:port]' -> (domain, port) 510 # value: 'application[/controller] -> (application, controller) 511 # (port and controller may be None) 512 # 513 domains = dict() 514 if routers.BASE.domains: 515 for (d, a) in routers.BASE.domains.iteritems(): 516 (domain, app) = (d.strip(':'), a.strip('/')) 517 if ':' in domain: 518 (domain, port) = domain.split(':') 519 else: 520 port = None 521 if '/' in app: 522 (app, ctlr) = app.split('/', 1) 523 else: 524 ctlr = None 525 if ctlr and '/' in ctlr: 526 (ctlr, fcn) = ctlr.split('/') 527 else: 528 fcn = None 529 if app not in all_apps and app not in routers: 530 raise SyntaxError("unknown app '%s' in domains" % app) 531 domains[(domain, port)] = (app, ctlr, fcn) 532 routers.BASE.domains = domains
533
534 535 -def regex_uri(e, regexes, tag, default=None):
536 "Filters incoming URI against a list of regexes" 537 path = e['PATH_INFO'] 538 host = e.get('HTTP_HOST', e.get('SERVER_NAME', 'localhost')).lower() 539 i = host.find(':') 540 if i > 0: 541 host = host[:i] 542 key = '%s:%s://%s:%s %s' % \ 543 (e.get('REMOTE_ADDR', 'localhost'), 544 e.get('wsgi.url_scheme', 'http').lower(), host, 545 e.get('REQUEST_METHOD', 'get').lower(), path) 546 for (regex, value, custom_env) in regexes: 547 if regex.match(key): 548 e.update(custom_env) 549 rewritten = regex.sub(value, key) 550 log_rewrite('%s: [%s] [%s] -> %s' % (tag, key, value, rewritten)) 551 return rewritten 552 log_rewrite('%s: [%s] -> %s (not rewritten)' % (tag, key, default)) 553 return default
554
555 556 -def regex_select(env=None, app=None, request=None):
557 """ 558 Selects a set of regex rewrite params for the current request 559 """ 560 if app: 561 THREAD_LOCAL.routes = params_apps.get(app, params) 562 elif env and params.routes_app: 563 if routers: 564 map_url_in(request, env, app=True) 565 else: 566 app = regex_uri(env, params.routes_app, "routes_app") 567 THREAD_LOCAL.routes = params_apps.get(app, params) 568 else: 569 THREAD_LOCAL.routes = params # default to base rewrite parameters 570 log_rewrite("select routing parameters: %s" % THREAD_LOCAL.routes.name) 571 return app # for doctest
572
573 574 -def regex_filter_in(e):
575 "Regex rewrite incoming URL" 576 routes = THREAD_LOCAL.routes 577 query = e.get('QUERY_STRING', None) 578 e['WEB2PY_ORIGINAL_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '') 579 if routes.routes_in: 580 path = regex_uri(e, routes.routes_in, 581 "routes_in", e['PATH_INFO']) 582 rmatch = regex_redirect.match(path) 583 if rmatch: 584 raise HTTP(int(rmatch.group(1)), location=rmatch.group(2)) 585 items = path.split('?', 1) 586 e['PATH_INFO'] = items[0] 587 if len(items) > 1: 588 if query: 589 query = items[1] + '&' + query 590 else: 591 query = items[1] 592 e['QUERY_STRING'] = query 593 e['REQUEST_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '') 594 return e
595
596 597 -def sluggify(key):
598 return key.lower().replace('.', '_')
599
600 -def invalid_url(routes):
601 raise HTTP(400, 602 routes.error_message % 'invalid request', 603 web2py_error='invalid path')
604
605 -def regex_url_in(request, environ):
606 "Rewrites and parses incoming URL" 607 608 # ################################################## 609 # select application 610 # rewrite URL if routes_in is defined 611 # update request.env 612 # ################################################## 613 614 regex_select(env=environ, request=request) 615 routes = THREAD_LOCAL.routes 616 if routes.routes_in: 617 environ = regex_filter_in(environ) 618 request.env.update( 619 (k.lower().replace('.', '_'), v) for k, v in environ.iteritems()) 620 621 # ################################################## 622 # serve if a static file 623 # ################################################## 624 625 path = urllib.unquote(request.env.path_info) or '/' 626 path = path.replace('\\', '/') 627 if path.endswith('/') and len(path) > 1: 628 path = path[:-1] 629 match = regex_url.match(path) 630 if not match: 631 invalid_url(routes) 632 request.raw_args = (match.group('s') or '') 633 if request.raw_args.startswith('/'): 634 request.raw_args = request.raw_args[1:] 635 if match.group('c') == 'static': 636 application = match.group('a') 637 version, filename = None, match.group('z').replace(' ','_') 638 if not filename: 639 raise HTTP(404) 640 items = filename.split('/', 1) 641 if regex_version.match(items[0]): 642 version, filename = items 643 static_folder = pjoin(request.env.applications_parent, 644 'applications', application,'static') 645 static_file = os.path.abspath(pjoin(static_folder,filename)) 646 if not static_file.startswith(static_folder): 647 invalid_url(routes) 648 return (static_file, version, environ) 649 else: 650 # ################################################## 651 # parse application, controller and function 652 # ################################################## 653 request.application = match.group('a') or routes.default_application 654 request.controller = match.group('c') or routes.default_controller 655 request.function = match.group('f') or routes.default_function 656 request.raw_extension = match.group('e') 657 request.extension = request.raw_extension or 'html' 658 if request.application in routes.routes_apps_raw: 659 # application is responsible for parsing args 660 request.args = None 661 elif request.raw_args: 662 args = regex_args.sub('_',request.raw_args) 663 request.args = List(args.split('/')) 664 else: 665 request.args = List([]) 666 return (None, None, environ)
667
668 669 -def regex_filter_out(url, e=None):
670 "Regex rewrite outgoing URL" 671 if not hasattr(THREAD_LOCAL, 'routes'): 672 regex_select() # ensure routes is set (for application threads) 673 routes = THREAD_LOCAL.routes 674 if routers: 675 return url # already filtered 676 if routes.routes_out: 677 items = url.split('?', 1) 678 if e: 679 host = e.get('http_host', 'localhost').lower() 680 i = host.find(':') 681 if i > 0: 682 host = host[:i] 683 items[0] = '%s:%s://%s:%s %s' % \ 684 (e.get('remote_addr', ''), 685 e.get('wsgi_url_scheme', 'http').lower(), host, 686 e.get('request_method', 'get').lower(), items[0]) 687 else: 688 items[0] = ':http://localhost:get %s' % items[0] 689 for (regex, value, tmp) in routes.routes_out: 690 if regex.match(items[0]): 691 rewritten = '?'.join([regex.sub(value, items[0])] + items[1:]) 692 log_rewrite('routes_out: [%s] -> %s' % (url, rewritten)) 693 return rewritten 694 log_rewrite('routes_out: [%s] not rewritten' % url) 695 return url
696
697 698 -def filter_url(url, method='get', remote='0.0.0.0', 699 out=False, app=False, lang=None, 700 domain=(None, None), env=False, scheme=None, 701 host=None, port=None, language=None):
702 """ 703 doctest/unittest interface to regex_filter_in() and regex_filter_out() 704 """ 705 match = regex_full_url.match(url) 706 urlscheme = match.group('scheme').lower() 707 urlhost = match.group('host').lower() 708 uri = match.group('uri') 709 k = uri.find('?') 710 if k < 0: 711 k = len(uri) 712 if isinstance(domain, str): 713 domain = (domain, None) 714 (path_info, query_string) = (uri[:k], uri[k + 1:]) 715 path_info = urllib.unquote(path_info) # simulate server 716 e = { 717 'REMOTE_ADDR': remote, 718 'REQUEST_METHOD': method, 719 'wsgi.url_scheme': urlscheme, 720 'HTTP_HOST': urlhost, 721 'REQUEST_URI': uri, 722 'PATH_INFO': path_info, 723 'QUERY_STRING': query_string, 724 #for filter_out request.env use lowercase 725 'remote_addr': remote, 726 'request_method': method, 727 'wsgi_url_scheme': urlscheme, 728 'http_host': urlhost 729 } 730 731 request = Storage() 732 e["applications_parent"] = global_settings.applications_parent 733 request.env = Storage(e) 734 request.uri_language = lang 735 736 # determine application only 737 # 738 if app: 739 if routers: 740 return map_url_in(request, e, app=True) 741 return regex_select(e) 742 743 # rewrite outbound URL 744 # 745 if out: 746 (request.env.domain_application, 747 request.env.domain_controller) = domain 748 items = path_info.lstrip('/').split('/') 749 if items[-1] == '': 750 items.pop() # adjust trailing empty args 751 assert len(items) >= 3, "at least /a/c/f is required" 752 a = items.pop(0) 753 c = items.pop(0) 754 f = items.pop(0) 755 if not routers: 756 return regex_filter_out(uri, e) 757 acf = map_url_out( 758 request, None, a, c, f, items, None, scheme, host, port, language=language) 759 if items: 760 url = '%s/%s' % (acf, '/'.join(items)) 761 if items[-1] == '': 762 url += '/' 763 else: 764 url = acf 765 if query_string: 766 url += '?' + query_string 767 return url 768 769 # rewrite inbound URL 770 # 771 (static, version, e) = url_in(request, e) 772 if static: 773 return static 774 result = "/%s/%s/%s" % ( 775 request.application, request.controller, request.function) 776 if request.extension and request.extension != 'html': 777 result += ".%s" % request.extension 778 if request.args: 779 result += " %s" % request.args 780 if e['QUERY_STRING']: 781 result += " ?%s" % e['QUERY_STRING'] 782 if request.uri_language: 783 result += " (%s)" % request.uri_language 784 if env: 785 return request.env 786 return result
787
788 789 -def filter_err(status, application='app', ticket='tkt'):
790 "doctest/unittest interface to routes_onerror" 791 routes = THREAD_LOCAL.routes 792 if status > 399 and routes.routes_onerror: 793 keys = set(('%s/%s' % (application, status), 794 '%s/*' % (application), 795 '*/%s' % (status), 796 '*/*')) 797 for (key, redir) in routes.routes_onerror: 798 if key in keys: 799 if redir == '!': 800 break 801 elif '?' in redir: 802 url = redir + '&' + 'code=%s&ticket=%s' % (status, ticket) 803 else: 804 url = redir + '?' + 'code=%s&ticket=%s' % (status, ticket) 805 return url # redirection 806 return status # no action
807
808 # router support 809 # 810 811 812 -class MapUrlIn(object):
813 "Logic for mapping incoming URLs" 814
815 - def __init__(self, request=None, env=None):
816 "Initializes a map-in object" 817 self.request = request 818 self.env = env 819 820 self.router = None 821 self.application = None 822 self.language = None 823 self.controller = None 824 self.function = None 825 self.extension = 'html' 826 827 self.controllers = set() 828 self.functions = dict() 829 self.languages = set() 830 self.default_language = None 831 self.map_hyphen = False 832 self.exclusive_domain = False 833 834 path = self.env['PATH_INFO'] 835 self.query = self.env.get('QUERY_STRING', None) 836 path = path.lstrip('/') 837 self.env['PATH_INFO'] = '/' + path 838 self.env['WEB2PY_ORIGINAL_URI'] = self.env['PATH_INFO'] + ( 839 self.query and ('?' + self.query) or '') 840 841 # to handle empty args, strip exactly one trailing slash, if present 842 # .../arg1// represents one trailing empty arg 843 # 844 if path.endswith('/'): 845 path = path[:-1] 846 self.args = List(path and path.split('/') or []) 847 848 # see http://www.python.org/dev/peps/pep-3333/#url-reconstruction for URL composition 849 self.remote_addr = self.env.get('REMOTE_ADDR', 'localhost') 850 self.scheme = self.env.get('wsgi.url_scheme', 'http').lower() 851 self.method = self.env.get('REQUEST_METHOD', 'get').lower() 852 (self.host, self.port) = (self.env.get('HTTP_HOST'), None) 853 if not self.host: 854 (self.host, self.port) = ( 855 self.env.get('SERVER_NAME'), self.env.get('SERVER_PORT')) 856 if not self.host: 857 (self.host, self.port) = ('localhost', '80') 858 if ':' in self.host: 859 (self.host, self.port) = self.host.rsplit(':',1) # for ipv6 support 860 if not self.port: 861 self.port = '443' if self.scheme == 'https' else '80'
862
863 - def map_prefix(self):
864 "Strips path prefix, if present in its entirety" 865 prefix = routers.BASE.path_prefix 866 if prefix: 867 prefixlen = len(prefix) 868 if prefixlen > len(self.args): 869 return 870 for i in xrange(prefixlen): 871 if prefix[i] != self.args[i]: 872 return # prefix didn't match 873 self.args = List(self.args[prefixlen:]) # strip the prefix
874
875 - def map_app(self):
876 "Determines application name" 877 base = routers.BASE # base router 878 self.domain_application = None 879 self.domain_controller = None 880 self.domain_function = None 881 arg0 = self.harg0 882 if not base.exclusive_domain and base.applications and arg0 in base.applications: 883 self.application = arg0 884 elif not base.exclusive_domain and arg0 and not base.applications: 885 self.application = arg0 886 elif (self.host, self.port) in base.domains: 887 (self.application, self.domain_controller, 888 self.domain_function) = base.domains[(self.host, self.port)] 889 self.env['domain_application'] = self.application 890 self.env['domain_controller'] = self.domain_controller 891 self.env['domain_function'] = self.domain_function 892 elif (self.host, None) in base.domains: 893 (self.application, self.domain_controller, 894 self.domain_function) = base.domains[(self.host, None)] 895 self.env['domain_application'] = self.application 896 self.env['domain_controller'] = self.domain_controller 897 self.env['domain_function'] = self.domain_function 898 elif base.applications and arg0 in base.applications: 899 self.application = arg0 900 elif arg0 and not base.applications: 901 self.application = arg0 902 else: 903 self.application = base.default_application or '' 904 self.pop_arg_if(self.application == arg0) 905 906 if not base._acfe_match.match(self.application): 907 raise HTTP( 908 400, THREAD_LOCAL.routes.error_message % 'invalid request', 909 web2py_error="invalid application: '%s'" % self.application) 910 911 if self.application not in routers and \ 912 (self.application != THREAD_LOCAL.routes.default_application or self.application == 'welcome'): 913 raise HTTP( 914 400, THREAD_LOCAL.routes.error_message % 'invalid request', 915 web2py_error="unknown application: '%s'" % self.application) 916 917 # set the application router 918 # 919 log_rewrite("select application=%s" % self.application) 920 self.request.application = self.application 921 if self.application not in routers: 922 self.router = routers.BASE # support gluon.main.wsgibase init->welcome 923 else: 924 self.router = routers[self.application] # application router 925 self.controllers = self.router.controllers 926 self.default_controller = self.domain_controller or self.router.default_controller 927 self.functions = self.router.functions 928 self.languages = self.router.languages 929 self.default_language = self.router.default_language 930 self.map_hyphen = self.router.map_hyphen 931 self.exclusive_domain = self.router.exclusive_domain 932 self._acfe_match = self.router._acfe_match 933 self.file_match = self.router.file_match 934 self._file_match = self.router._file_match 935 self._args_match = self.router._args_match
936
937 - def map_root_static(self):
938 """ 939 Handles root-static files (no hyphen mapping) 940 941 a root-static file is one whose incoming URL expects it to be at the root, 942 typically robots.txt & favicon.ico 943 """ 944 945 if len(self.args) == 1 and self.arg0 in self.router.root_static: 946 self.controller = self.request.controller = 'static' 947 root_static_file = pjoin(self.request.env.applications_parent, 948 'applications', self.application, 949 self.controller, self.arg0) 950 log_rewrite("route: root static=%s" % root_static_file) 951 return root_static_file, None 952 return None, None
953
954 - def map_language(self):
955 "Handles language (no hyphen mapping)" 956 arg0 = self.arg0 # no hyphen mapping 957 if arg0 and self.languages and arg0 in self.languages: 958 self.language = arg0 959 else: 960 self.language = self.default_language 961 if self.language: 962 log_rewrite("route: language=%s" % self.language) 963 self.pop_arg_if(self.language == arg0) 964 arg0 = self.arg0
965
966 - def map_controller(self):
967 "Identifies controller" 968 # handle controller 969 # 970 arg0 = self.harg0 # map hyphens 971 if not arg0 or (self.controllers and arg0 not in self.controllers): 972 self.controller = self.default_controller or '' 973 else: 974 self.controller = arg0 975 self.pop_arg_if(arg0 == self.controller) 976 log_rewrite("route: controller=%s" % self.controller) 977 if not self.router._acfe_match.match(self.controller): 978 raise HTTP( 979 400, THREAD_LOCAL.routes.error_message % 'invalid request', 980 web2py_error='invalid controller')
981
982 - def map_static(self):
983 ''' 984 Handles static files 985 file_match but no hyphen mapping 986 ''' 987 if self.controller != 'static': 988 return None, None 989 version = regex_version.match(self.args(0)) 990 if self.args and version: 991 file = '/'.join(self.args[1:]) 992 else: 993 file = '/'.join(self.args) 994 if len(self.args) == 0: 995 bad_static = True # require a file name 996 elif '/' in self.file_match: 997 # match the path 998 bad_static = not self.router._file_match.match(file) 999 else: 1000 # match path elements 1001 bad_static = False 1002 for name in self.args: 1003 bad_static = bad_static or name in ( 1004 '', '.', '..') or not self.router._file_match.match(name) 1005 if bad_static: 1006 log_rewrite('bad static path=%s' % file) 1007 raise HTTP(400, 1008 THREAD_LOCAL.routes.error_message % 'invalid request', 1009 web2py_error='invalid static file') 1010 # 1011 # support language-specific static subdirectories, 1012 # eg /appname/en/static/filename => applications/appname/static/en/filename 1013 # if language-specific file doesn't exist, try same file in static 1014 # 1015 if self.language: 1016 static_file = pjoin(self.request.env.applications_parent, 1017 'applications', self.application, 1018 'static', self.language, file) 1019 if not self.language or not isfile(static_file): 1020 static_file = pjoin(self.request.env.applications_parent, 1021 'applications', self.application, 1022 'static', file) 1023 self.extension = None 1024 log_rewrite("route: static=%s" % static_file) 1025 return static_file, version
1026
1027 - def map_function(self):
1028 "Handles function.extension" 1029 arg0 = self.harg0 # map hyphens 1030 functions = self.functions.get(self.controller, set()) 1031 if isinstance(self.router.default_function, dict): 1032 default_function = self.router.default_function.get( 1033 self.controller, None) 1034 else: 1035 default_function = self.router.default_function # str or None 1036 default_function = self.domain_function or default_function 1037 if not arg0 or functions and arg0 not in functions: 1038 self.function = default_function or "" 1039 self.pop_arg_if(arg0 and self.function == arg0) 1040 else: 1041 func_ext = arg0.split('.') 1042 if len(func_ext) > 1: 1043 self.function = func_ext[0] 1044 self.extension = func_ext[-1] 1045 else: 1046 self.function = arg0 1047 self.pop_arg_if(True) 1048 log_rewrite( 1049 "route: function.ext=%s.%s" % (self.function, self.extension)) 1050 1051 if not self.router._acfe_match.match(self.function): 1052 raise HTTP( 1053 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1054 web2py_error='invalid function') 1055 if self.extension and not self.router._acfe_match.match(self.extension): 1056 raise HTTP( 1057 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1058 web2py_error='invalid extension')
1059
1060 - def validate_args(self):
1061 """ 1062 Checks args against validation pattern 1063 """ 1064 for arg in self.args: 1065 if not self.router._args_match.match(arg): 1066 raise HTTP( 1067 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1068 web2py_error='invalid arg <%s>' % arg)
1069
1070 - def sluggify(self):
1071 self.request.env.update( 1072 (k.lower().replace('.', '_'), v) for k, v in self.env.iteritems())
1073
1074 - def update_request(self):
1075 """ 1076 Updates request from self 1077 Builds env.request_uri 1078 Makes lower-case versions of http headers in env 1079 """ 1080 self.request.application = self.application 1081 self.request.controller = self.controller 1082 self.request.function = self.function 1083 self.request.extension = self.extension 1084 self.request.args = self.args 1085 if self.language: 1086 self.request.uri_language = self.language 1087 uri = '/%s/%s' % (self.controller, self.function) 1088 app = self.application 1089 if self.map_hyphen: 1090 uri = uri.replace('_', '-') 1091 app = app.replace('_', '-') 1092 if self.extension and self.extension != 'html': 1093 uri += '.' + self.extension 1094 if self.language: 1095 uri = '/%s%s' % (self.language, uri) 1096 uri = '/%s%s%s%s' % ( 1097 app, 1098 uri, 1099 urllib.quote('/' + '/'.join( 1100 str(x) for x in self.args)) if self.args else '', 1101 ('?' + self.query) if self.query else '') 1102 self.env['REQUEST_URI'] = uri 1103 self.sluggify()
1104 1105 @property
1106 - def arg0(self):
1107 "Returns first arg" 1108 return self.args(0)
1109 1110 @property
1111 - def harg0(self):
1112 "Returns first arg with optional hyphen mapping" 1113 if self.map_hyphen and self.args(0): 1114 return self.args(0).replace('-', '_') 1115 return self.args(0)
1116
1117 - def pop_arg_if(self, dopop):
1118 "Conditionally removes first arg and returns new first arg" 1119 if dopop: 1120 self.args.pop(0)
1121
1122 1123 -class MapUrlOut(object):
1124 "Logic for mapping outgoing URLs" 1125
1126 - def __init__(self, request, env, application, controller, 1127 function, args, other, scheme, host, port, language):
1128 "initialize a map-out object" 1129 self.default_application = routers.BASE.default_application 1130 if application in routers: 1131 self.router = routers[application] 1132 else: 1133 self.router = routers.BASE 1134 self.request = request 1135 self.env = env 1136 self.application = application 1137 self.controller = controller 1138 self.is_static = ( 1139 controller == 'static' or controller.startswith('static/')) 1140 self.function = function 1141 self.args = args 1142 self.other = other 1143 self.scheme = scheme 1144 self.host = host 1145 self.port = port 1146 self.language = language 1147 1148 self.applications = routers.BASE.applications 1149 self.controllers = self.router.controllers 1150 self.functions = self.router.functions.get(self.controller, set()) 1151 self.languages = self.router.languages 1152 self.default_language = self.router.default_language 1153 self.exclusive_domain = self.router.exclusive_domain 1154 self.map_hyphen = self.router.map_hyphen 1155 self.map_static = self.router.map_static 1156 self.path_prefix = routers.BASE.path_prefix 1157 1158 self.domain_application = request and self.request.env.domain_application 1159 self.domain_controller = request and self.request.env.domain_controller 1160 if isinstance(self.router.default_function, dict): 1161 self.default_function = self.router.default_function.get( 1162 self.controller, None) 1163 else: 1164 self.default_function = self.router.default_function 1165 1166 if (self.router.exclusive_domain and self.domain_application and self.domain_application != self.application and not self.host): 1167 raise SyntaxError('cross-domain conflict: must specify host') 1168 1169 lang = self.language if self.language else request and request.uri_language 1170 if lang and self.languages and lang in self.languages: 1171 self.language = lang 1172 else: 1173 self.language = None 1174 1175 self.omit_application = False 1176 self.omit_language = False 1177 self.omit_controller = False 1178 self.omit_function = False
1179
1180 - def omit_lang(self):
1181 "Omits language if possible" 1182 1183 if not self.language or self.language == self.default_language: 1184 self.omit_language = True
1185
1186 - def omit_acf(self):
1187 "Omits what we can of a/c/f" 1188 1189 router = self.router 1190 1191 # Handle the easy no-args case of tail-defaults: /a/c /a / 1192 # 1193 if not self.args and self.function == self.default_function: 1194 self.omit_function = True 1195 if self.controller == router.default_controller: 1196 self.omit_controller = True 1197 if self.application == self.default_application: 1198 self.omit_application = True 1199 1200 # omit default application 1201 # (which might be the domain default application) 1202 # 1203 default_application = self.domain_application or self.default_application 1204 if self.application == default_application: 1205 self.omit_application = True 1206 1207 # omit controller if default controller 1208 # 1209 default_controller = ((self.application == self.domain_application) and self.domain_controller) or router.default_controller or '' 1210 if self.controller == default_controller: 1211 self.omit_controller = True 1212 1213 # omit function if possible 1214 # 1215 if self.functions and self.function in self.functions and self.function == self.default_function: 1216 self.omit_function = True 1217 1218 # prohibit ambiguous cases 1219 # 1220 # because we presume the lang string to be unambiguous, its presence protects application omission 1221 # 1222 if self.exclusive_domain: 1223 applications = [self.domain_application] 1224 else: 1225 applications = self.applications 1226 if self.omit_language: 1227 if not applications or self.controller in applications: 1228 self.omit_application = False 1229 if self.omit_application: 1230 if not applications or self.function in applications: 1231 self.omit_controller = False 1232 if not self.controllers or self.function in self.controllers: 1233 self.omit_controller = False 1234 if self.args: 1235 if self.args[0] in self.functions or self.args[0] in self.controllers or self.args[0] in applications: 1236 self.omit_function = False 1237 if self.omit_controller: 1238 if self.function in self.controllers or self.function in applications: 1239 self.omit_controller = False 1240 if self.omit_application: 1241 if self.controller in applications: 1242 self.omit_application = False 1243 1244 # handle static as a special case 1245 # (easier for external static handling) 1246 # 1247 if self.is_static: 1248 if not self.map_static: 1249 self.omit_application = False 1250 if self.language: 1251 self.omit_language = False 1252 self.omit_controller = False 1253 self.omit_function = False
1254
1255 - def build_acf(self):
1256 "Builds a/c/f from components" 1257 acf = '' 1258 if self.map_hyphen: 1259 self.application = self.application.replace('_', '-') 1260 self.controller = self.controller.replace('_', '-') 1261 if self.controller != 'static' and not self.controller.startswith('static/'): 1262 self.function = self.function.replace('_', '-') 1263 if not self.omit_application: 1264 acf += '/' + self.application 1265 # handle case of flipping lang/static/file to static/lang/file for external rewrite 1266 if self.is_static and self.map_static is False and not self.omit_language: 1267 acf += '/' + self.controller + '/' + self.language 1268 else: 1269 if not self.omit_language: 1270 acf += '/' + self.language 1271 if not self.omit_controller: 1272 acf += '/' + self.controller 1273 if not self.omit_function: 1274 acf += '/' + self.function 1275 if self.path_prefix: 1276 acf = '/' + '/'.join(self.path_prefix) + acf 1277 if self.args: 1278 return acf 1279 return acf or '/'
1280
1281 - def acf(self):
1282 "Converts components to /app/lang/controller/function" 1283 1284 if not routers: 1285 return None # use regex filter 1286 self.omit_lang() # try to omit language 1287 self.omit_acf() # try to omit a/c/f 1288 return self.build_acf() # build and return the /a/lang/c/f string
1289
1290 1291 -def map_url_in(request, env, app=False):
1292 "Routes incoming URL" 1293 1294 # initialize router-url object 1295 # 1296 THREAD_LOCAL.routes = params # default to base routes 1297 map = MapUrlIn(request=request, env=env) 1298 map.sluggify() 1299 map.map_prefix() # strip prefix if present 1300 map.map_app() # determine application 1301 1302 # configure THREAD_LOCAL.routes for error rewrite 1303 # 1304 if params.routes_app: 1305 THREAD_LOCAL.routes = params_apps.get(app, params) 1306 1307 if app: 1308 return map.application 1309 1310 root_static_file, version = map.map_root_static( 1311 ) # handle root-static files 1312 if root_static_file: 1313 map.update_request() 1314 return (root_static_file, version, map.env) 1315 # handle mapping of lang/static to static/lang in externally-rewritten URLs 1316 # in case we have to handle them ourselves 1317 if map.languages and map.map_static is False and map.arg0 == 'static' and map.args(1) in map.languages: 1318 map.map_controller() 1319 map.map_language() 1320 else: 1321 map.map_language() 1322 map.map_controller() 1323 static_file, version = map.map_static() 1324 if static_file: 1325 map.update_request() 1326 return (static_file, version, map.env) 1327 map.map_function() 1328 map.validate_args() 1329 map.update_request() 1330 return (None, None, map.env)
1331
1332 1333 -def map_url_out(request, env, application, controller, 1334 function, args, other, scheme, host, port, language=None):
1335 """ 1336 Supply /a/c/f (or /a/lang/c/f) portion of outgoing url 1337 1338 The basic rule is that we can only make transformations 1339 that map_url_in can reverse. 1340 1341 Suppose that the incoming arguments are a,c,f,args,lang 1342 and that the router defaults are da, dc, df, dl. 1343 1344 We can perform these transformations trivially if args=[] and lang=None or dl:: 1345 1346 /da/dc/df => / 1347 /a/dc/df => /a 1348 /a/c/df => /a/c 1349 1350 We would also like to be able to strip the default application or application/controller 1351 from URLs with function/args present, thus:: 1352 1353 /da/c/f/args => /c/f/args 1354 /da/dc/f/args => /f/args 1355 1356 We use [applications] and [controllers] and {functions} to suppress ambiguous omissions. 1357 1358 We assume that language names do not collide with a/c/f names. 1359 """ 1360 map = MapUrlOut(request, env, application, controller, 1361 function, args, other, scheme, host, port, language) 1362 return map.acf()
1363
1364 1365 -def get_effective_router(appname):
1366 "Returns a private copy of the effective router for the specified application" 1367 if not routers or appname not in routers: 1368 return None 1369 return Storage(routers[appname]) # return a copy
1370