November 9, 2014 03:54
diff --git a/problemo.py b/problemo.py
 #!/usr/bin/env python
 #
 # Half-assed fuzzer for HTML strip-tags functions
 #
 
 from __future__ import print_function

 BITS = [
    '<', '</', '>', '<!--', '-->', 'a', 'div', 'p', 'i',
 ]
 
 how_many_test_strings = 10
 
 import random
 import re
 import markupsafe
 from django.utils.html import _strip_once, strip_tags

 test_file_template = markupsafe.Markup(
 """
 <html>
 <head><title>--test--</title>
 <body>
 <h1>{name}</h1>
 <big>
 <table width="100%">
 <tr>
 <td>{challenge}</td>
 <td>{result}am i a link?</td>
 </tr>
 </table>
 </big>
 </body>
 </html>
 """)
 
 
 def make_test_string(num_bits):
    return ''.join([random.choice(BITS) for k in range(num_bits)])
 
 
 failure_re = re.compile(r'<a>')
 TEST_STRINGS = [make_test_string(random.randrange(75)) for k in range(how_many_test_strings)]
 
 any_fails = False

 def gauntlet(name, fixer):
    global any_fails
    for num, challenge in enumerate(TEST_STRINGS):
        result = fixer(challenge)
        if failure_re.search(result):
            any_fails = True
            print("{name}-{num} FAIL: {challenge}\t\t\t\t{result}".format(**locals()))
            with open('{name}-{num}.html'.format(**locals()), 'w') as out:
                result = markupsafe.Markup(result)
                out.write(test_file_template.format(**locals()))
        else:
            pass
            #print("{name}-{num} SUCCESS: {challenge}\t\t\t\t({result})".format(**locals()))
 
 CONTENDERS = [
    ('markupsafe-re', r'(<!--.*?-->|<[^>]*>)'),
    ('broken-re', r'(<!--.*?-->|<[^<]*>)'),
 ]
 
 def remove_html_markup(s):
    tag = False
    quote = False
    out = ""

    for c in s:
            if c == '<' and not quote:
                tag = True
            elif c == '>' and not quote:
                tag = False
            elif (c == '"' or c == "'") and tag:
                quote = not quote
            elif not tag:
                out = out + c

    return out 
 
 
 
 for (name, fixer_re) in CONTENDERS:
    print(fixer_re)
    print()
    fixer_re = re.compile(fixer_re)
    fixer = lambda s: fixer_re.sub('', s)
    gauntlet(name, fixer)
 
 
 print("old django")
 print()
 gauntlet('_strip_once', _strip_once)
 
 def fixed_striptags(html):
    value = html
    while True:
        new_value = strip_tags(value)
        if value == new_value:
            return value
        value = new_value
 
 
 print("new django")
 print()
 gauntlet('strip_tags', strip_tags)


 print("@Medeiros remove_html_markup")
 print()
 gauntlet('remove_html_markup', remove_html_markup)

 exit(any_fails)
	#!/usr/bin/env python
	#
	# Half-assed fuzzer for HTML strip-tags functions
	#

	from __future__ import print_function

	BITS = [
	'<', '</', '>', '<!--', '-->', 'a', 'div', 'p', 'i',
	]

	how_many_test_strings = 10

	import random
	import re
	import markupsafe
	from django.utils.html import _strip_once, strip_tags

	test_file_template = markupsafe.Markup(
	"""
	<html>
	<head><title>--test--</title>
	<body>
	<h1>{name}</h1>
	<big>
	<table width="100%">
	<tr>
	<td>{challenge}</td>
	<td>{result}am i a link?</td>
	</tr>
	</table>
	</big>
	</body>
	</html>
	""")


	def make_test_string(num_bits):
	return ''.join([random.choice(BITS) for k in range(num_bits)])


	failure_re = re.compile(r'<a>')
	TEST_STRINGS = [make_test_string(random.randrange(75)) for k in range(how_many_test_strings)]

	any_fails = False

	def gauntlet(name, fixer):
	global any_fails
	for num, challenge in enumerate(TEST_STRINGS):
	result = fixer(challenge)
	if failure_re.search(result):
	any_fails = True
	print("{name}-{num} FAIL: {challenge}\t\t\t\t{result}".format(**locals()))
	with open('{name}-{num}.html'.format(**locals()), 'w') as out:
	result = markupsafe.Markup(result)
	out.write(test_file_template.format(**locals()))
	else:
	pass
	#print("{name}-{num} SUCCESS: {challenge}\t\t\t\t({result})".format(**locals()))

	CONTENDERS = [
	('markupsafe-re', r'(<!--.?-->\|<[^>]>)'),
	('broken-re', r'(<!--.?-->\|<[^<]>)'),
	]

	def remove_html_markup(s):
	tag = False
	quote = False
	out = ""

	for c in s:
	if c == '<' and not quote:
	tag = True
	elif c == '>' and not quote:
	tag = False
	elif (c == '"' or c == "'") and tag:
	quote = not quote
	elif not tag:
	out = out + c

	return out



	for (name, fixer_re) in CONTENDERS:
	print(fixer_re)
	print()
	fixer_re = re.compile(fixer_re)
	fixer = lambda s: fixer_re.sub('', s)
	gauntlet(name, fixer)


	print("old django")
	print()
	gauntlet('_strip_once', _strip_once)

	def fixed_striptags(html):
	value = html
	while True:
	new_value = strip_tags(value)
	if value == new_value:
	return value
	value = new_value


	print("new django")
	print()
	gauntlet('strip_tags', strip_tags)


	print("@Medeiros remove_html_markup")
	print()
	gauntlet('remove_html_markup', remove_html_markup)

	exit(any_fails)
No results found