Last active
April 28, 2024 06:51
-
-
Save thluiz/4751212 to your computer and use it in GitHub Desktop.
Just a short python script to convert a file from one encoding to another
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
""" | |
Convert a file from one encoder to another | |
usage: python file_encoder.py 'source encode', 'target encode', | |
'source file', 'target' | |
example: python file_encoder.py 'iso8859-1', 'utf-8', 'file.html', | |
'file-utf8.html' | |
""" | |
import sys | |
import codecs | |
if sys.argv.__len__() != 5: | |
print( | |
"usage: file_encoder.py 'source encode', 'target encode', " | |
"'source file', 'target' ") | |
sys.exit(2) | |
source_encode = sys.argv[1] # first parameter is the script itself | |
target_encode = sys.argv[2] | |
sourceFileName = sys.argv[3] | |
targetFileName = sys.argv[4] | |
BLOCK = 1048576 | |
with codecs.open(sourceFileName, "r", source_encode) as sourceFile: | |
with codecs.open(targetFileName, "w", target_encode) as targetFile: | |
while True: | |
contents = sourceFile.read(BLOCK) | |
if not contents: | |
break | |
targetFile.write(contents) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment