Created
December 15, 2014 17:01
-
-
Save soeirosantos/974ff73854beddda9541 to your computer and use it in GitHub Desktop.
Convert all files in a specified directory to utf-8. Warning: Make a backup of directory before run the script ;)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
sources: | |
http://stackoverflow.com/questions/191359/how-to-convert-a-file-to-utf-8-in-python | |
http://stackoverflow.com/questions/2212643/python-recursive-folder-read | |
''' | |
import os, sys, codecs | |
def convert(filepath): | |
BLOCKSIZE = 1048576 | |
old_filepath = filepath+".old" | |
os.rename(filepath, old_filepath) | |
with codecs.open(old_filepath, "r", "iso-8859-1") as sourceFile: | |
with codecs.open(filepath, "w", "utf-8") as targetFile: | |
while True: | |
contents = sourceFile.read(BLOCKSIZE) | |
if not contents: | |
break | |
targetFile.write(contents) | |
os.remove(old_filepath) | |
def walk(walk_dir): | |
walk_dir = os.path.abspath(walk_dir) | |
for root, subdirs, files in os.walk(walk_dir): | |
for filename in files: | |
convert(os.path.join(root, filename)) | |
if __name__ == '__main__': | |
walk(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment