1
2
3
4 """
5 | This file is part of the web2py Web Framework
6 | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
7 | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
8 """
9
10 import re
11 import cgi
12
13 __all__ = ['highlight']
14
15
17
18 """Does syntax highlighting.
19 """
20
21 - def __init__(
22 self,
23 mode,
24 link=None,
25 styles=None,
26 ):
27 """
28 Initialize highlighter:
29 mode = language (PYTHON, WEB2PY,C, CPP, HTML, HTML_PLAIN)
30 """
31 styles = styles or {}
32 mode = mode.upper()
33 if link and link[-1] != '/':
34 link = link + '/'
35 self.link = link
36 self.styles = styles
37 self.output = []
38 self.span_style = None
39 if mode == 'WEB2PY':
40 (mode, self.suppress_tokens) = ('PYTHON', [])
41 elif mode == 'PYTHON':
42 self.suppress_tokens = ['GOTOHTML']
43 elif mode == 'CPP':
44 (mode, self.suppress_tokens) = ('C', [])
45 elif mode == 'C':
46 self.suppress_tokens = ['CPPKEYWORD']
47 elif mode == 'HTML_PLAIN':
48 (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON'])
49 elif mode == 'HTML':
50 self.suppress_tokens = []
51 else:
52 raise SyntaxError('Unknown mode: %s' % mode)
53 self.mode = mode
54
55 - def c_tokenizer(
56 self,
57 token,
58 match,
59 style,
60 ):
68
75 """
76 Callback for python specific highlighting.
77 """
78
79 value = cgi.escape(match.group())
80 if token == 'MULTILINESTRING':
81 self.change_style(token, style)
82 self.output.append(value)
83 self.strMultilineString = match.group(1)
84 return 'PYTHONMultilineString'
85 elif token == 'ENDMULTILINESTRING':
86 if match.group(1) == self.strMultilineString:
87 self.output.append(value)
88 self.strMultilineString = ''
89 return 'PYTHON'
90 if style and style[:5] == 'link:':
91 self.change_style(None, None)
92 (url, style) = style[5:].split(';', 1)
93 if url == 'None' or url == '':
94 self.output.append('<span style="%s">%s</span>'
95 % (style, value))
96 else:
97 self.output.append('<a href="%s%s" style="%s">%s</a>'
98 % (url, value, style, value))
99 else:
100 self.change_style(token, style)
101 self.output.append(value)
102 if token == 'GOTOHTML':
103 return 'HTML'
104 return None
105
106 - def html_tokenizer(
107 self,
108 token,
109 match,
110 style,
111 ):
112 """
113 Callback for HTML specific highlighting.
114 """
115
116 value = cgi.escape(match.group())
117 self.change_style(token, style)
118 self.output.append(value)
119 if token == 'GOTOPYTHON':
120 return 'PYTHON'
121 return None
122
123 all_styles = {
124 'C': (c_tokenizer, (
125 ('COMMENT', re.compile(r'//.*\r?\n'),
126 'color: green; font-style: italic'),
127 ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL),
128 'color: green; font-style: italic'),
129 ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n',
130 re.DOTALL), 'color: magenta; font-style: italic'),
131 ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'),
132 'font-weight: bold'),
133 ('NUMBER',
134 re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
135 'color: red'),
136 ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|'
137 + r'signed|unsigned|float|double|'
138 + r'goto|break|return|continue|asm|'
139 + r'case|default|if|else|switch|while|for|do|'
140 + r'struct|union|enum|typedef|'
141 + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'),
142 'color:#185369; font-weight: bold'),
143 ('CPPKEYWORD',
144 re.compile(r'(class|private|protected|public|template|new|delete|'
145 + r'this|friend|using|inline|export|bool|throw|try|catch|'
146 + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'),
147 'color: blue; font-weight: bold'),
148 ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'),
149 'color: #FF9966'),
150 ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
151 None),
152 ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
153 )),
154 'PYTHON': (python_tokenizer, (
155 ('GOTOHTML', re.compile(r'\}\}'), 'color: red'),
156 ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'),
157 'font-weight: bold'),
158 ('NUMBER',
159 re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'
160 ), 'color: red'),
161 ('KEYWORD',
162 re.compile(r'(def|class|break|continue|del|exec|finally|pass|'
163 + r'print|raise|return|try|except|global|assert|lambda|'
164 + r'yield|for|while|if|elif|else|and|in|is|not|or|import|'
165 + r'from|True|False)(?![a-zA-Z0-9_])'),
166 'color:#185369; font-weight: bold'),
167 ('WEB2PY',
168 re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])'
169 ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'),
170 ('MAGIC', re.compile(r'self|None'),
171 'color:#185369; font-weight: bold'),
172 ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'),
173 'color: #FF9966'),
174 ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'
175 ), 'color: #FF9966'),
176 ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
177 None),
178 ('COMMENT', re.compile(r'\#.*\r?\n'),
179 'color: green; font-style: italic'),
180 ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
181 )),
182 'PYTHONMultilineString': (python_tokenizer,
183 (('ENDMULTILINESTRING',
184 re.compile(r'.*?("""|\'\'\')',
185 re.DOTALL), 'color: darkred'), )),
186 'HTML': (html_tokenizer, (
187 ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'),
188 ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'),
189 'color: green; font-style: italic'),
190 ('XMLCRAP', re.compile(r'<![^>]*>'),
191 'color: blue; font-style: italic'),
192 ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE
193 + re.DOTALL), 'color: black'),
194 ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'),
195 'color: darkred; font-weight: bold'),
196 ('ENDTAG', re.compile(r'/?>'),
197 'color: darkred; font-weight: bold'),
198 )),
199 }
200
202 """
203 Syntax highlight some python code.
204 Returns html version of code.
205 """
206
207 i = 0
208 mode = self.mode
209 while i < len(data):
210 for (token, o_re, style) in Highlighter.all_styles[mode][1]:
211 if not token in self.suppress_tokens:
212 match = o_re.match(data, i)
213 if match:
214 if style:
215 new_mode = \
216 Highlighter.all_styles[mode][0](self,
217 token, match, style
218 % dict(link=self.link))
219 else:
220 new_mode = \
221 Highlighter.all_styles[mode][0](self,
222 token, match, style)
223 if not new_mode is None:
224 mode = new_mode
225 i += max(1, len(match.group()))
226 break
227 else:
228 self.change_style(None, None)
229 self.output.append(data[i])
230 i += 1
231 self.change_style(None, None)
232 return ''.join(self.output).expandtabs(4)
233
235 """
236 Generate output to change from existing style to another style only.
237 """
238
239 if token in self.styles:
240 style = self.styles[token]
241 if self.span_style != style:
242 if style != 'Keep':
243 if not self.span_style is None:
244 self.output.append('</span>')
245 if not style is None:
246 self.output.append('<span style="%s">' % style)
247 self.span_style = style
248
249
250 -def highlight(
251 code,
252 language,
253 link='/examples/globals/vars/',
254 counter=1,
255 styles=None,
256 highlight_line=None,
257 context_lines=None,
258 attributes=None,
259 ):
260 styles = styles or {}
261 attributes = attributes or {}
262 if not 'CODE' in styles:
263 code_style = """
264 font-size: 11px;
265 font-family: Bitstream Vera Sans Mono,monospace;
266 background-color: transparent;
267 margin: 0;
268 padding: 5px;
269 border: none;
270 overflow: auto;
271 white-space: pre !important;\n"""
272 else:
273 code_style = styles['CODE']
274 if not 'LINENUMBERS' in styles:
275 linenumbers_style = """
276 font-size: 11px;
277 font-family: Bitstream Vera Sans Mono,monospace;
278 background-color: transparent;
279 margin: 0;
280 padding: 5px;
281 border: none;
282 color: #A0A0A0;\n"""
283 else:
284 linenumbers_style = styles['LINENUMBERS']
285 if not 'LINEHIGHLIGHT' in styles:
286 linehighlight_style = "background-color: #EBDDE2;"
287 else:
288 linehighlight_style = styles['LINEHIGHLIGHT']
289
290 if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML',
291 'WEB2PY']:
292 code = Highlighter(language, link, styles).highlight(code)
293 else:
294 code = cgi.escape(code)
295 lines = code.split('\n')
296
297 if counter is None:
298 linenumbers = [''] * len(lines)
299 elif isinstance(counter, str):
300 linenumbers = [cgi.escape(counter)] * len(lines)
301 else:
302 linenumbers = [str(i + counter) + '.' for i in
303 xrange(len(lines))]
304
305 if highlight_line:
306 if counter and not isinstance(counter, str):
307 lineno = highlight_line - counter
308 else:
309 lineno = highlight_line
310 if lineno < len(lines):
311 lines[lineno] = '<div style="%s">%s</div>' % (
312 linehighlight_style, lines[lineno])
313 linenumbers[lineno] = '<div style="%s">%s</div>' % (
314 linehighlight_style, linenumbers[lineno])
315
316 if context_lines:
317 if lineno + context_lines < len(lines):
318 del lines[lineno + context_lines:]
319 del linenumbers[lineno + context_lines:]
320 if lineno - context_lines > 0:
321 del lines[0:lineno - context_lines]
322 del linenumbers[0:lineno - context_lines]
323
324 code = '<br/>'.join(lines)
325 numbers = '<br/>'.join(linenumbers)
326
327 items = attributes.items()
328 fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1]
329 == '_' and value is None] + ['%s="%s"'
330 % (key[1:].lower(), str(value).replace('"', "'"))
331 for (key, value) in attributes.items() if key[:1]
332 == '_' and value])
333 if fa:
334 fa = ' ' + fa
335 return '<table%s><tr style="vertical-align:top;"><td style="min-width:40px; text-align: right;"><pre style="%s">%s</pre></td><td><pre style="%s">%s</pre></td></tr></table>'\
336 % (fa, linenumbers_style, numbers, code_style, code)
337
338
339 if __name__ == '__main__':
340 import sys
341 argfp = open(sys.argv[1])
342 data = argfp.read()
343 argfp.close()
344 print '<html><body>' + highlight(data, sys.argv[2])\
345 + '</body></html>'
346