Skip to content

Instantly share code, notes, and snippets.

@mstriemer
Created June 6, 2014 15:25
Show Gist options
  • Save mstriemer/70eac0e13f05c1950404 to your computer and use it in GitHub Desktop.
Save mstriemer/70eac0e13f05c1950404 to your computer and use it in GitHub Desktop.
Test which unicode characters survive in spidermonkey
# Run me from the root of mozilla/app-validator (requires updates from pull #27).
import json
import re
import subprocess
import sys
from tempfile import NamedTemporaryFile
from appvalidator import unicodehelper
# Flip me to see if it works.
ESCAPE_JS = False
def make_char(i):
return eval("u'\\u{code:0>4}'".format(code=i))
def encode_char(char):
return json.dumps(unicodehelper.decode(char, js_safe=ESCAPE_JS))
START = 0
END = 10000
with NamedTemporaryFile() as f:
for i in xrange(START, END):
char = make_char(i)
f.write(char.encode('utf-8'))
f.write('\n')
f.flush()
chars = open(f.name).read().splitlines()
print "There are {l} chars".format(l=len(chars))
js_script = """
var line = readline();
while (line !== null) {
print(JSON.stringify(JSON.parse(line)));
line = readline();
}"""
encoded_chars = [encode_char(char) for char in chars]
failed = []
cmd = ['/usr/local/bin/js', "-e", js_script]
shell_obj = subprocess.Popen(
cmd, shell=False, stdin=subprocess.PIPE, stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
data, stderr = shell_obj.communicate('\n'.join(encoded_chars))
for i, out_char in enumerate(data.splitlines()):
char = chars[i]
try:
json.loads(out_char)
passed = True
except:
passed = False
failed.append(char)
if not passed:
print(u'char {i} {passed} ({char!r})'.format(
i=i, char=char, passed='passed' if passed else 'failed'))
if failed:
print(failed)
print('{l} failed'.format(l=len(failed)))
else:
print('all passed')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment