Skip to content

Instantly share code, notes, and snippets.

@nitely
Last active February 26, 2016 02:43
Show Gist options
  • Save nitely/73fb077b1f2482cdb077 to your computer and use it in GitHub Desktop.
Save nitely/73fb077b1f2482cdb077 to your computer and use it in GitHub Desktop.
URL Sanitizer
SAFE_PROTOCOLS = {'http', 'https'}
def escape(text):
return (text
.replace('&', '&')
.replace('<', '&lt;')
.replace('>', '&gt;')
.replace('"', '&quot;')
.replace("'", '&#39;'))
def sanitize_url(url):
url = escape(url) # & -> &amp; ...
parts = url.split(':', 1)
if len(parts) == 1: # No protocol
return url
if parts[0] in SAFE_PROTOCOLS:
return url
return '' # or prepend http:// to url
def test_safe_links():
attack_vectors = (
# "standard" javascript pseudo protocol
('javascript:alert`1`', ''),
# bypass attempt
('jAvAsCrIpT:alert`1`', ''),
# javascript pseudo protocol with entities
('javascript&colon;alert`1`', 'javascript&amp;colon;alert`1`'),
# javascript pseudo protocol with prefix (dangerous in Chrome)
('\x1Ajavascript:alert`1`', ''),
# data-URI (dangerous in Firefox)
('data:text/html,<script>alert`1`</script>', ''),
# vbscript-URI (dangerous in Internet Explorer)
('vbscript:msgbox', ''),
# breaking out of the attribute
('"<>', '&quot;&lt;&gt;'),
)
for vector, expected in attack_vectors:
assert expected == sanitize_url(vector)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment