Created
July 27, 2014 11:15
-
-
Save loic/27ac306c3ebfc50ea6de to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/django/utils/encoding.py b/django/utils/encoding.py | |
index 6a2f877..21712bc 100644 | |
--- a/django/utils/encoding.py | |
+++ b/django/utils/encoding.py | |
@@ -1,3 +1,4 @@ | |
+# -*- encoding: utf-8 -*- | |
from __future__ import unicode_literals | |
import codecs | |
@@ -188,7 +189,9 @@ def iri_to_uri(iri): | |
assuming input is either UTF-8 or unicode already, we can simplify things a | |
little from the full method. | |
- Returns an ASCII string containing the encoded result. | |
+ Takes an IRI in UTF-8 bytes (e.g. '/I \xe2\x99\xa5 Django/') or unicode | |
+ (e.g. '/I ♥ Django/') and returns ASCII bytes containing the encoded result | |
+ (e.g. '/I%20%E2%99%A5%20Django/'). | |
""" | |
# The list of safe characters here is constructed from the "reserved" and | |
# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986: | |
@@ -213,7 +216,9 @@ def uri_to_iri(uri): | |
Resource Identifier(IRI). | |
This is the algorithm from section 3.2 of RFC 3987. | |
- Returns a valid IRI utf-8 encoded bytes. | |
+ | |
+ Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns | |
+ UTF-8 bytes containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/'). | |
""" | |
if uri is None: | |
return uri | |
diff --git a/tests/utils_tests/test_encoding.py b/tests/utils_tests/test_encoding.py | |
index fc3bc24..7f16d96 100644 | |
--- a/tests/utils_tests/test_encoding.py | |
+++ b/tests/utils_tests/test_encoding.py | |
@@ -40,12 +40,6 @@ class TestEncodingUtils(unittest.TestCase): | |
today = datetime.date.today() | |
self.assertEqual(force_bytes(today, strings_only=True), today) | |
- def test_filepath_to_uri(self): | |
- self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), | |
- 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4') | |
- self.assertEqual(filepath_to_uri('upload\\чубака.mp4'.encode('utf-8')), | |
- 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4') | |
- | |
@unittest.skipIf(six.PY3, "tests a class not defining __str__ under Python 2") | |
def test_decorated_class_without_str(self): | |
with self.assertRaises(ValueError): | |
@@ -53,65 +47,71 @@ class TestEncodingUtils(unittest.TestCase): | |
class NoStr(object): | |
pass | |
- def test_iri_to_uri(self): | |
- self.assertEqual(iri_to_uri('red%09ros\xe9#red'), | |
- 'red%09ros%C3%A9#red') | |
- self.assertEqual(iri_to_uri('/blog/for/J\xfcrgen M\xfcnster/'), | |
- '/blog/for/J%C3%BCrgen%20M%C3%BCnster/') | |
+class TestRFC3987IEncodingUtils(unittest.TestCase): | |
+ | |
+ def test_filepath_to_uri(self): | |
+ self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), | |
+ 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4') | |
+ self.assertEqual(filepath_to_uri('upload\\чубака.mp4'.encode('utf-8')), | |
+ 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4') | |
+ | |
+ def test_iri_to_uri(self): | |
+ cases = [ | |
+ # Valid UTF-8 sequences are encoded. | |
+ ('red%09rosé#red', 'red%09ros%C3%A9#red'), | |
+ ('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'), | |
+ ('locations/%s' % urlquote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'), | |
- self.assertEqual(iri_to_uri('locations/%s' % urlquote_plus('Paris & Orl\xe9ans')), | |
- 'locations/Paris+%26+Orl%C3%A9ans') | |
+ # Reserved chars remain unescaped. | |
+ ('%&', '%&'), | |
+ ('red&♥ros%#red', 'red&%E2%99%A5ros%#red'), | |
+ ] | |
- def test_iri_to_uri_idempotent(self): | |
- self.assertEqual(iri_to_uri(iri_to_uri('red%09ros\xe9#red')), | |
- 'red%09ros%C3%A9#red') | |
+ for iri, uri in cases: | |
+ self.assertEqual(iri_to_uri(iri), uri) | |
- def test_iri_to_uri_reserved(self): | |
- self.assertEqual(iri_to_uri('%&'), '%&') | |
- self.assertEqual(iri_to_uri('red&♥ros%#red'), | |
- 'red&%E2%99%A5ros%#red') | |
+ # Test idempotency. | |
+ self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri) | |
def test_uri_to_iri(self): | |
- def test(uri): | |
- return uri_to_iri(uri).decode('utf-8') | |
- | |
- self.assertEqual(test('~%A9helloworld'), '~%A9helloworld') | |
- self.assertEqual(test('d%AAo%AAw%AAn%AAl%AAo%AAa%AAd%AA/'), 'd%AAo%AAw%AAn%AAl%AAo%AAa%AAd%AA/') | |
- self.assertEqual(test('/%E2%99%E2%99%A5/'), '/%E2%99♥/') | |
- self.assertEqual(test('/%E2%99%A5'), '/♥') | |
- self.assertEqual(test('/%E2%98%80%E2%99%A5/'), '/☀♥/') | |
- self.assertEqual(test('/%E2%98%8E%E2%A9%E2%99%A5/'), '/☎%E2%A9♥/') | |
- self.assertEqual(test('/%E2%99%BF%99☃%E2%99%A3%E2%98%BD%A9'), '/♿%99☃♣☽%A9') | |
- self.assertEqual(test('/%E2%98%90/fred?utf8=%E2%9C%93'), '/☐/fred?utf8=✓') | |
- self.assertEqual(test('/☐/fred?utf8=☓'), '/☐/fred?utf8=☓') | |
- self.assertEqual(test('/üsername'), '/üsername') | |
- self.assertEqual(test('/üser:pässword@☃'), '/üser:pässword@☃') | |
- self.assertEqual(test('/%3Fmeh?foo=%26%A9'), '/?meh?foo=&%A9') | |
- self.assertEqual(test('/%E2%A8%87%87%A5%E2%A8%A0'), '/⨇%87%A5⨠') | |
- self.assertEqual(test('/你好'), '/你好') | |
- | |
- def test_uri_to_iri_idempotent(self): | |
- def test(uri): | |
- return uri_to_iri(uri).decode('utf-8') | |
- | |
- self.assertEqual(test(test('~%A9helloworld')), '~%A9helloworld') | |
- self.assertEqual(test(test('d%AAo%AAw%AAn%AAl%AAo%AAa%AAd%AA/')), 'd%AAo%AAw%AAn%AAl%AAo%AAa%AAd%AA/') | |
- self.assertEqual(test(test('/%E2%99%E2%99%A5/')), '/%E2%99♥/') | |
- self.assertEqual(test(test('/%E2%99%A5')), '/♥') | |
- | |
- def test_complementary(self): | |
- def test_iri_to_iri(iri): | |
+ cases = [ | |
+ # Valid UTF-8 sequences are decoded. | |
+ ('/%E2%99%A5%E2%99%A5/', '/♥♥/'), | |
+ ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'), | |
+ | |
+ # Broken UTF-8 sequences remain escaped. | |
+ ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'), | |
+ ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'), | |
+ ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'), | |
+ ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'), | |
+ ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'), | |
+ ] | |
+ | |
+ for uri, iri in cases: | |
iri = iri.encode('utf-8') | |
- self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri) | |
- def test_uri_to_uri(uri): | |
+ self.assertEqual(uri_to_iri(uri), iri) | |
+ | |
+ # Test idempotency. | |
+ self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri) | |
+ | |
+ def test_complementarity(self): | |
+ cases = [ | |
+ ('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'), | |
+ ('%&', '%&'), | |
+ ('red&%E2%99%A5ros%#red', 'red&♥ros%#red'), | |
+ ('/%E2%99%A5%E2%99%A5/', '/♥♥/'), | |
+ ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'), | |
+ ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'), | |
+ ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'), | |
+ ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'), | |
+ ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'), | |
+ ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'), | |
+ ] | |
+ | |
+ for uri, iri in cases: | |
+ iri = iri.encode('utf-8') | |
self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri) | |
- | |
- test_iri_to_iri('~%A9helloworld') | |
- test_iri_to_iri('/üser:pässword@☃') | |
- test_iri_to_iri('/你好') | |
- | |
- test_uri_to_uri('~%A9helloworld') | |
- test_uri_to_uri('/%E2%99%A5') | |
- test_uri_to_uri('/%E2%98%80%E2%99%A5') | |
+ self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri) | |
+# | |
\ No newline at end of file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment