Package gluon :: Module highlight
[hide private]
[frames] | no frames]

Source Code for Module gluon.highlight

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  | This file is part of the web2py Web Framework 
  6  | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 
  7  | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 
  8  """ 
  9   
 10  import re 
 11  import cgi 
 12   
 13  __all__ = ['highlight'] 
 14   
 15   
16 -class Highlighter(object):
17 18 """Does syntax highlighting. 19 """ 20
21 - def __init__( 22 self, 23 mode, 24 link=None, 25 styles=None, 26 ):
27 """ 28 Initialize highlighter: 29 mode = language (PYTHON, WEB2PY,C, CPP, HTML, HTML_PLAIN) 30 """ 31 styles = styles or {} 32 mode = mode.upper() 33 if link and link[-1] != '/': 34 link = link + '/' 35 self.link = link 36 self.styles = styles 37 self.output = [] 38 self.span_style = None 39 if mode == 'WEB2PY': 40 (mode, self.suppress_tokens) = ('PYTHON', []) 41 elif mode == 'PYTHON': 42 self.suppress_tokens = ['GOTOHTML'] 43 elif mode == 'CPP': 44 (mode, self.suppress_tokens) = ('C', []) 45 elif mode == 'C': 46 self.suppress_tokens = ['CPPKEYWORD'] 47 elif mode == 'HTML_PLAIN': 48 (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON']) 49 elif mode == 'HTML': 50 self.suppress_tokens = [] 51 else: 52 raise SyntaxError('Unknown mode: %s' % mode) 53 self.mode = mode
54
55 - def c_tokenizer( 56 self, 57 token, 58 match, 59 style, 60 ):
61 """ 62 Callback for C specific highlighting. 63 """ 64 65 value = cgi.escape(match.group()) 66 self.change_style(token, style) 67 self.output.append(value)
68
69 - def python_tokenizer( 70 self, 71 token, 72 match, 73 style, 74 ):
75 """ 76 Callback for python specific highlighting. 77 """ 78 79 value = cgi.escape(match.group()) 80 if token == 'MULTILINESTRING': 81 self.change_style(token, style) 82 self.output.append(value) 83 self.strMultilineString = match.group(1) 84 return 'PYTHONMultilineString' 85 elif token == 'ENDMULTILINESTRING': 86 if match.group(1) == self.strMultilineString: 87 self.output.append(value) 88 self.strMultilineString = '' 89 return 'PYTHON' 90 if style and style[:5] == 'link:': 91 self.change_style(None, None) 92 (url, style) = style[5:].split(';', 1) 93 if url == 'None' or url == '': 94 self.output.append('<span style="%s">%s</span>' 95 % (style, value)) 96 else: 97 self.output.append('<a href="%s%s" style="%s">%s</a>' 98 % (url, value, style, value)) 99 else: 100 self.change_style(token, style) 101 self.output.append(value) 102 if token == 'GOTOHTML': 103 return 'HTML' 104 return None
105
106 - def html_tokenizer( 107 self, 108 token, 109 match, 110 style, 111 ):
112 """ 113 Callback for HTML specific highlighting. 114 """ 115 116 value = cgi.escape(match.group()) 117 self.change_style(token, style) 118 self.output.append(value) 119 if token == 'GOTOPYTHON': 120 return 'PYTHON' 121 return None
122 123 all_styles = { 124 'C': (c_tokenizer, ( 125 ('COMMENT', re.compile(r'//.*\r?\n'), 126 'color: green; font-style: italic'), 127 ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL), 128 'color: green; font-style: italic'), 129 ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n', 130 re.DOTALL), 'color: magenta; font-style: italic'), 131 ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'), 132 'font-weight: bold'), 133 ('NUMBER', 134 re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), 135 'color: red'), 136 ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|' 137 + r'signed|unsigned|float|double|' 138 + r'goto|break|return|continue|asm|' 139 + r'case|default|if|else|switch|while|for|do|' 140 + r'struct|union|enum|typedef|' 141 + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'), 142 'color:#185369; font-weight: bold'), 143 ('CPPKEYWORD', 144 re.compile(r'(class|private|protected|public|template|new|delete|' 145 + r'this|friend|using|inline|export|bool|throw|try|catch|' 146 + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'), 147 'color: blue; font-weight: bold'), 148 ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), 149 'color: #FF9966'), 150 ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'), 151 None), 152 ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'), 153 )), 154 'PYTHON': (python_tokenizer, ( 155 ('GOTOHTML', re.compile(r'\}\}'), 'color: red'), 156 ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'), 157 'font-weight: bold'), 158 ('NUMBER', 159 re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+' 160 ), 'color: red'), 161 ('KEYWORD', 162 re.compile(r'(def|class|break|continue|del|exec|finally|pass|' 163 + r'print|raise|return|try|except|global|assert|lambda|' 164 + r'yield|for|while|if|elif|else|and|in|is|not|or|import|' 165 + r'from|True|False)(?![a-zA-Z0-9_])'), 166 'color:#185369; font-weight: bold'), 167 ('WEB2PY', 168 re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])' 169 ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'), 170 ('MAGIC', re.compile(r'self|None'), 171 'color:#185369; font-weight: bold'), 172 ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'), 173 'color: #FF9966'), 174 ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"' 175 ), 'color: #FF9966'), 176 ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'), 177 None), 178 ('COMMENT', re.compile(r'\#.*\r?\n'), 179 'color: green; font-style: italic'), 180 ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'), 181 )), 182 'PYTHONMultilineString': (python_tokenizer, 183 (('ENDMULTILINESTRING', 184 re.compile(r'.*?("""|\'\'\')', 185 re.DOTALL), 'color: darkred'), )), 186 'HTML': (html_tokenizer, ( 187 ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'), 188 ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'), 189 'color: green; font-style: italic'), 190 ('XMLCRAP', re.compile(r'<![^>]*>'), 191 'color: blue; font-style: italic'), 192 ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE 193 + re.DOTALL), 'color: black'), 194 ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'), 195 'color: darkred; font-weight: bold'), 196 ('ENDTAG', re.compile(r'/?>'), 197 'color: darkred; font-weight: bold'), 198 )), 199 } 200
201 - def highlight(self, data):
202 """ 203 Syntax highlight some python code. 204 Returns html version of code. 205 """ 206 207 i = 0 208 mode = self.mode 209 while i < len(data): 210 for (token, o_re, style) in Highlighter.all_styles[mode][1]: 211 if not token in self.suppress_tokens: 212 match = o_re.match(data, i) 213 if match: 214 if style: 215 new_mode = \ 216 Highlighter.all_styles[mode][0](self, 217 token, match, style 218 % dict(link=self.link)) 219 else: 220 new_mode = \ 221 Highlighter.all_styles[mode][0](self, 222 token, match, style) 223 if not new_mode is None: 224 mode = new_mode 225 i += max(1, len(match.group())) 226 break 227 else: 228 self.change_style(None, None) 229 self.output.append(data[i]) 230 i += 1 231 self.change_style(None, None) 232 return ''.join(self.output).expandtabs(4)
233
234 - def change_style(self, token, style):
235 """ 236 Generate output to change from existing style to another style only. 237 """ 238 239 if token in self.styles: 240 style = self.styles[token] 241 if self.span_style != style: 242 if style != 'Keep': 243 if not self.span_style is None: 244 self.output.append('</span>') 245 if not style is None: 246 self.output.append('<span style="%s">' % style) 247 self.span_style = style
248 249
250 -def highlight( 251 code, 252 language, 253 link='/examples/globals/vars/', 254 counter=1, 255 styles=None, 256 highlight_line=None, 257 context_lines=None, 258 attributes=None, 259 ):
260 styles = styles or {} 261 attributes = attributes or {} 262 if not 'CODE' in styles: 263 code_style = """ 264 font-size: 11px; 265 font-family: Bitstream Vera Sans Mono,monospace; 266 background-color: transparent; 267 margin: 0; 268 padding: 5px; 269 border: none; 270 overflow: auto; 271 white-space: pre !important;\n""" 272 else: 273 code_style = styles['CODE'] 274 if not 'LINENUMBERS' in styles: 275 linenumbers_style = """ 276 font-size: 11px; 277 font-family: Bitstream Vera Sans Mono,monospace; 278 background-color: transparent; 279 margin: 0; 280 padding: 5px; 281 border: none; 282 color: #A0A0A0;\n""" 283 else: 284 linenumbers_style = styles['LINENUMBERS'] 285 if not 'LINEHIGHLIGHT' in styles: 286 linehighlight_style = "background-color: #EBDDE2;" 287 else: 288 linehighlight_style = styles['LINEHIGHLIGHT'] 289 290 if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML', 291 'WEB2PY']: 292 code = Highlighter(language, link, styles).highlight(code) 293 else: 294 code = cgi.escape(code) 295 lines = code.split('\n') 296 297 if counter is None: 298 linenumbers = [''] * len(lines) 299 elif isinstance(counter, str): 300 linenumbers = [cgi.escape(counter)] * len(lines) 301 else: 302 linenumbers = [str(i + counter) + '.' for i in 303 xrange(len(lines))] 304 305 if highlight_line: 306 if counter and not isinstance(counter, str): 307 lineno = highlight_line - counter 308 else: 309 lineno = highlight_line 310 if lineno < len(lines): 311 lines[lineno] = '<div style="%s">%s</div>' % ( 312 linehighlight_style, lines[lineno]) 313 linenumbers[lineno] = '<div style="%s">%s</div>' % ( 314 linehighlight_style, linenumbers[lineno]) 315 316 if context_lines: 317 if lineno + context_lines < len(lines): 318 del lines[lineno + context_lines:] 319 del linenumbers[lineno + context_lines:] 320 if lineno - context_lines > 0: 321 del lines[0:lineno - context_lines] 322 del linenumbers[0:lineno - context_lines] 323 324 code = '<br/>'.join(lines) 325 numbers = '<br/>'.join(linenumbers) 326 327 items = attributes.items() 328 fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1] 329 == '_' and value is None] + ['%s="%s"' 330 % (key[1:].lower(), str(value).replace('"', "'")) 331 for (key, value) in attributes.items() if key[:1] 332 == '_' and value]) 333 if fa: 334 fa = ' ' + fa 335 return '<table%s><tr style="vertical-align:top;"><td style="min-width:40px; text-align: right;"><pre style="%s">%s</pre></td><td><pre style="%s">%s</pre></td></tr></table>'\ 336 % (fa, linenumbers_style, numbers, code_style, code)
337 338 339 if __name__ == '__main__': 340 import sys 341 argfp = open(sys.argv[1]) 342 data = argfp.read() 343 argfp.close() 344 print '<html><body>' + highlight(data, sys.argv[2])\ 345 + '</body></html>' 346