gluon.sanitizer

33

34 - def __init__( 35 self, 36 permitted_tags=[ 37 'a', 38 'b', 39 'blockquote', 40 'br/', 41 'i', 42 'li', 43 'ol', 44 'ul', 45 'p', 46 'cite', 47 'code', 48 'pre', 49 'img/', 50 ], 51 allowed_attributes={'a': ['href', 'title'], 'img': ['src', 'alt' 52 ], 'blockquote': ['type']}, 53 fmt=AbstractFormatter, 54 strip_disallowed=False 55 ):

56 57 HTMLParser.__init__(self, fmt) 58 self.result = '' 59 self.open_tags = [] 60 self.permitted_tags = [i for i in permitted_tags if i[-1] != '/'] 61 self.requires_no_close = [i[:-1] for i in permitted_tags 62 if i[-1] == '/'] 63 self.permitted_tags += self.requires_no_close 64 self.allowed_attributes = allowed_attributes 65 66 # The only schemes allowed in URLs (for href and src attributes). 67 # Adding "javascript" or "vbscript" to this list would not be smart. 68 69 self.allowed_schemes = ['http', 'https', 'ftp', 'mailto'] 70 71 #to strip or escape disallowed tags? 72 self.strip_disallowed = strip_disallowed 73 self.in_disallowed = False

74

75 - def handle_data(self, data):

76 if data and not self.in_disallowed: 77 self.result += xssescape(data)

78

79 - def handle_charref(self, ref):

80 if self.in_disallowed: 81 return 82 elif len(ref) < 7 and ref.isdigit(): 83 self.result += '&#%s;' % ref 84 else: 85 self.result += xssescape('&#%s' % ref)

86

87 - def handle_entityref(self, ref):

88 if self.in_disallowed: 89 return 90 elif ref in entitydefs: 91 self.result += '&%s;' % ref 92 else: 93 self.result += xssescape('&%s' % ref)

94

95 - def handle_comment(self, comment):

96 if self.in_disallowed: 97 return 98 elif comment: 99 self.result += xssescape('' % comment)

100

101 - def handle_starttag( 102 self, 103 tag, 104 method, 105 attrs, 106 ):

107 if tag not in self.permitted_tags: 108 if self.strip_disallowed: 109 self.in_disallowed = True 110 else: 111 self.result += xssescape('<%s>' % tag) 112 else: 113 bt = '<' + tag 114 if tag in self.allowed_attributes: 115 attrs = dict(attrs) 116 self.allowed_attributes_here = [x for x in 117 self.allowed_attributes[tag] if x in attrs 118 and len(attrs[x]) > 0] 119 for attribute in self.allowed_attributes_here: 120 if attribute in ['href', 'src', 'background']: 121 if self.url_is_acceptable(attrs[attribute]): 122 bt += ' %s="%s"' % (attribute, 123 attrs[attribute]) 124 else: 125 bt += ' %s=%s' % (xssescape(attribute), 126 quoteattr(attrs[attribute])) 127 if bt == '<a' or bt == '<img': 128 return 129 if tag in self.requires_no_close: 130 bt += ' /' 131 bt += '>' 132 self.result += bt 133 self.open_tags.insert(0, tag)

134

135 - def handle_endtag(self, tag, attrs):

136 bracketed = '</%s>' % tag 137 if tag not in self.permitted_tags: 138 if self.strip_disallowed: 139 self.in_disallowed = False 140 else: 141 self.result += xssescape(bracketed) 142 elif tag in self.open_tags: 143 self.result += bracketed 144 self.open_tags.remove(tag)

145

146 - def unknown_starttag(self, tag, attributes):

147 self.handle_starttag(tag, None, attributes)

148

149 - def unknown_endtag(self, tag):

150 self.handle_endtag(tag, None)

151

152 - def url_is_acceptable(self, url):

153 """ 154 Accepts relative, absolute, and mailto urls 155 """ 156 157 parsed = urlparse(url) 158 return (parsed[0] in self.allowed_schemes and '.' in parsed[1]) \ 159 or (parsed[0] in self.allowed_schemes and '@' in parsed[2]) \ 160 or (parsed[0] == '' and parsed[2].startswith('/'))

161

162 - def strip(self, rawstring, escape=True):

163 """ 164 Returns the argument stripped of potentially harmful 165 HTML or Javascript code 166 167 @type escape: boolean 168 @param escape: If True (default) it escapes the potentially harmful 169 content, otherwise remove it 170 """ 171 172 if not isinstance(rawstring, str): 173 return str(rawstring) 174 for tag in self.requires_no_close: 175 rawstring = rawstring.replace("<%s/>" % tag, "<%s />" % tag) 176 if not escape: 177 self.strip_disallowed = True 178 self.result = '' 179 self.feed(rawstring) 180 for endtag in self.open_tags: 181 if endtag not in self.requires_no_close: 182 self.result += '</%s>' % endtag 183 return self.result

184

185 - def xtags(self):

186 """ 187 Returns a printable string informing the user which tags are allowed 188 """ 189 190 tg = '' 191 for x in sorted(self.permitted_tags): 192 tg += '<' + x 193 if x in self.allowed_attributes: 194 for y in self.allowed_attributes[x]: 195 tg += ' %s=""' % y 196 tg += '> ' 197 return xssescape(tg.strip())

Source Code for Module gluon.sanitizer