simonw · February 12, 2010 12:04
diff --git a/escape_unwhitelisted_tags.py b/escape_unwhitelisted_tags.py
 import re
 p = re.compile(
    r'(<b>|</b>|<i>|</i>|<blockquote>|</blockquote>|<a href="[^"]+">|</a>)', 
    re.IGNORECASE
 )

 escape = lambda s: s.replace(
    '&', '&amp;'
 ).replace(
    '>', '&gt;'
 ).replace(
    '<', '&lt;'
 )

 def escape_unwhitelisted_tags(html):
    """
    This is NOT a complete solution for sanitising potentially malicious 
    HTML. You still need to ensure that the resulting tags are correctly 
    balanced, and it's VITAL that you add an additional step to check that 
    users have not used dodgy protocols like javascript: in their a tags.
    """
    s = []
    for token in p.split(html):
        if p.match(token):
            s.append(token)
        else:
            s.append(escape(token))
    return ''.join(s)
	import re
	p = re.compile(
	r'(<b>\|</b>\|<i>\|</i>\|<blockquote>\|</blockquote>\|<a href="[^"]+">\|</a>)',
	re.IGNORECASE
	)

	escape = lambda s: s.replace(
	'&', '&'
	).replace(
	'>', '>'
	).replace(
	'<', '<'
	)

	def escape_unwhitelisted_tags(html):
	"""
	This is NOT a complete solution for sanitising potentially malicious
	HTML. You still need to ensure that the resulting tags are correctly
	balanced, and it's VITAL that you add an additional step to check that
	users have not used dodgy protocols like javascript: in their a tags.
	"""
	s = []
	for token in p.split(html):
	if p.match(token):
	s.append(token)
	else:
	s.append(escape(token))
	return ''.join(s)