Last active
August 29, 2015 14:01
-
-
Save evansd/41ea9dfc90d87f6afde1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import | |
import json | |
import re | |
from django import template | |
from django.utils.html import format_html_join | |
register = template.Library() | |
# Note: this is much more restrictive than the actual rules for | |
# JavaScript variables, but we prefer to err on the side of | |
# caution here | |
JS_VARIABLE_RE = re.compile(r'^[a-z_\$][0-9a-z_\$]*$', re.IGNORECASE) | |
# Use a regex to do character replacements so we can do them in a single pass | |
REPLACEMENTS = {'<': r'\u003c', '&': r'\u0026', '>': r'\u003e'} | |
REPLACE_RE = re.compile('|'.join(map(re.escape, REPLACEMENTS.keys()))) | |
def replace(match): | |
return REPLACEMENTS[match.group(0)] | |
def escape_json_for_script(json_string): | |
""" | |
Escape a JSON string so that it can be safely included in a `<script>` | |
element in an HTML or XHTML document | |
Escaped sequences are based on the recommendations here: | |
http://www.w3.org/TR/html5/scripting-1.html#restrictions-for-contents-of-script-elements | |
https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet | |
< : should be escaped in all contexts | |
& : only relevant for XHTML documents | |
> : only relevant within CDATA blocks or HTML comments | |
As these sequences can only occur within strings in JSON, we can safely replace | |
them with unicode expressions. | |
""" | |
return REPLACE_RE.sub(replace, json_string) | |
@register.simple_tag(name='json') | |
def json_encode(data, var=None, indent=None, **kwargs): | |
""" | |
Embed `data` as JSON encoded string with a `<script>` element of type | |
`application/json` or, if 'var' is supplied, as a global JavaScript | |
variable | |
Any extra keyword arguments are added as attributes on the element | |
(underscores in keyword names are replaced with dashes) | |
""" | |
encoded = json.dumps(data, indent=indent) | |
safe_encoded = escape_json_for_script(encoded) | |
attrs = {key.replace('_', '-'): value for (key, value) in kwargs.items()} | |
if var is not None: | |
if not JS_VARIABLE_RE.match(var): | |
raise ValueError(u'Invalid JavaScript variable name: %s' % var) | |
attrs['type'] = 'application/javascript' | |
template = u'<script {attrs}>var {var} = {data};</script>' | |
else: | |
attrs['type'] = 'application/json' | |
template = u'<script {attrs}>{data}</script>' | |
return template.format( | |
attrs=format_html_join(u' ', u'{0}="{1}"', attrs.items()), | |
data=safe_encoded, | |
var=var) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@piquadrat You're absolutely right. Hex escapes aren't valid JSON, although they are valid JavaScript, so if you're "parsing" the JSON by interpreting it as JavaScript then it happens to work.
Thanks for spotting this!