Created
June 8, 2021 11:17
-
-
Save joereddington/358ea23802c5a2e2388049dc7693dc85 to your computer and use it in GitHub Desktop.
Working out what encode/decode really do.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
from unittest import TestCase | |
import unittest | |
import io | |
class unicodeTest(TestCase): | |
def test_ascii_start(self): | |
uni_str=u'hello' | |
print uni_str | |
print uni_str.decode() | |
print uni_str.encode() | |
self.assertEqual("","") | |
def test_complex_start(self): | |
uni_str=u'hellø' | |
print uni_str | |
print uni_str.encode("utf8") # Okay, encode's argument is to identify the code you are writing. So if it's ascii, there are errors when there isn't an ascii option. | |
self.assertEqual("","") | |
# str.encode([encoding[, errors]]) | |
# | |
# Return an encoded version of the string. Default encoding is the current default string encoding. errors may be given to set a different error handling scheme. The default for errors is 'strict', meaning that encoding errors raise a UnicodeError. Other possible values are 'ignore', 'replace', 'xmlcharrefreplace', 'backslashreplace' and any other name registered via codecs.register_error(), see section Codec Base Classes. For a list of possible encodings, see section Standard Encodings. | |
def test_emoji(self): | |
uni_st=u"à" | |
print len(uni_st) | |
encoded_str=uni_st.encode('utf8') | |
print "({})".format(encoded_str) | |
def test_writing_a_file(self): | |
filename="Ελπίζω" #I expected this to automatically be a unicode string but it wasn't | |
print type(filename) | |
filename_u=u"Ελπίζω" | |
print type(filename_u) | |
print filename | |
print filename_u | |
#but both print fine because :shrug | |
f = open(filename+"s.txt", "w") | |
f.write("Now the file has more content!") | |
f.close() | |
f = open(filename_u+"u.txt", "w") | |
f.write("Now the file has more content!") | |
f.close() | |
def test_writing_inside_a_file(self): | |
filename="Ελπίζω" #I expected this to automatically be a unicode string but it wasn't | |
print type(filename) | |
filename_u=u"Ελπίζω" | |
print type(filename_u) | |
print filename | |
print filename_u | |
#but both print fine because :shrug | |
f = open(filename+"si.txt", "w") | |
f.write(filename) | |
f.close() | |
f = io.open(filename_u+"ui.txt", "w") | |
f.write(filename_u) | |
f.close() | |
if __name__=="__main__": | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment