|
import sys |
|
import os |
|
import argparse |
|
|
|
placeholder = 'Paste mojibake here\nここで文字化けをペースト下さい' |
|
default_source_enc = 'sjis' |
|
|
|
# Open a file in binary mode and overwrite contents |
|
# with the text encoded in the given encoding |
|
def write_file(filename, encoding, text): |
|
with open(filename, 'wb') as f: |
|
f.write(text.encode(encoding)) |
|
f.write(b'\n') # newline at end of file |
|
|
|
# Open a file and return a string of the file contents decoded using the specified encoding |
|
def read_file(filename, encoding): |
|
with open(filename, 'r', encoding=encoding) as f: |
|
return f.read() |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser(description='Fix mojibake characters') |
|
parser.add_argument('filename', metavar='file', |
|
help='file to convert') |
|
parser.add_argument('--create', dest='create_file', action='store_true', |
|
help='create the file to paste mojibake text into, and open it in TextEdit') |
|
parser.add_argument('--overwrite', dest='overwrite_file', action='store_true', |
|
help='(only with --create) overwrite file') |
|
parser.add_argument('--from', dest='from_encoding', |
|
metavar='source_encoding', |
|
default=default_source_enc, |
|
help=f'source encoding (default: {default_source_enc})') |
|
|
|
args = parser.parse_args() |
|
|
|
from_encoding = args.from_encoding |
|
# target_encoding = 'utf-8' |
|
filename = args.filename |
|
create_file = args.create_file |
|
overwrite_file = args.overwrite_file |
|
|
|
if create_file: |
|
if os.path.exists(filename): |
|
if not overwrite_file: |
|
print('File already exists, skipping...') |
|
else: |
|
# 1. Write placeholder text to file, in original encoding |
|
print('Writing placeholder text to file') |
|
write_file(filename, from_encoding, placeholder) |
|
|
|
# 2. Paste manually into TextEdit |
|
print('Please paste the text manually into TextEdit') |
|
print('Press enter when ready to proceed') |
|
os.system('open -a TextEdit ' + filename) |
|
input() |
|
|
|
# 3. Output correctly-decoded file contents |
|
# New encoding should be in utf-8, which (I think) usually corresponds to what Python uses to output and is what most terminals use by default |
|
converted = read_file(filename, from_encoding) |
|
print(converted) |
癱挴慤慴) ༃�袙톯Ɠ ᙰ 険땓逕碬ꮚ㼩맗袜抝ꛆ⎧㡠� ; ָ � ø Ĥ Ĥ Ĥ - Ĥ ǘ � Ǡ ǰ ǰ ǰ ǰ ǰ ǰ ǰ : : ጐ ǰ ɐ ʠ ̘ ΐ ϰ р Ґ Ә Ԡ ը ȓ ȓ Ƞ Ã ã ȳ ɓ ɣ ɳ ʃ ʓ ʣ ɀ ȳ ʳ ˃ ˓ ˣ ˳ ̃ ɀ C ̐ ̠ C ̐ ̰ C ̐ ̀ s ͐ ͠ s ͐ Ͱ s Ő s Ű Ń s ͐ ΐ 뿤 뿬@ � 8 8 � -P - .ቌĮȺܘ̮ޞ࠘Ȑ࢚ň᠆Ȇ @ � 8 8 � � .P . Юᡴဇ鸃᠇Ȇ H ,
8 8 � � 0P 0 + 1 Ԯܘخఘܮ౼ഘർਘमఘਮ౼ഘମർଘದȇ᠊Ȇ H , � 8 8 � � 2P 2 + 3 മܘฮఘ༮౼ഘီർਘᄮఘሮ౼ഘጮർଘᒦȇ᠊Ȇ @ � � 8 8 � 8P 8 ᔮᘺܘᠮ᤺ᨺހ࠘ᬮᴺࢀؘ� @ � 8 8 � C� C ḮἺ›ؘ� @ � 8 8 � B� B ℮∺⌺ؘ� @ � 8 8 � ;� ; ┺ؘ�@ � 8 8 � <� < ☮✺ؘ�@ � 8 8 � =� = ⠮⤺ؘ�@ � 8 8 � >� > ⨮⬺ⰺؘ� ڨ ۈ ۰ ܰ ݸ ޘ ߘ ࠈ ࠰ ࡐ ࢰ ࣰ न ঀ স ਐ ਸ ઐ ૠ ଈ ର ୰ ఈ ౸ ೀ ഠ ൈ ൸ ච ฐ ๘ ຈ ຸ ༠ མ ྨ ࿘ ဈ ၀ ၰ Ⴈ ა ᄈ ᅘ ᆨ ᇐ ሀ ረ ቐ ኀ � � � � QtQuick � � � QtQuick.Controls � � � QtQuick.Controls.impl � � � T � � � QtQuick.Templates � � T.ToolTip � � control � � � x � � � expression for x � � � y � � � expression for y
� � implicitWidth � � � expression for implicitWidth � � � implicitHeight � � � expression for implicitHeight � � margins � � padding � � � closePolicy � � � expression for closePolicy � � � Text � � � text � � � expression for text � � � font � � � expression for font � � � wrapMode � � � expression for wrapMode � � � color � � � expression for color � � � contentItem � � Rectangle � � � border
� � background � � � parent � � � width � � � Math � � � max � � � implicitBackgroundWidth � � leftInset
� � rightInset � � contentWidth � � � leftPadding � � rightPadding � � � implicitBackgroundHeight � � � topInset � � � bottomInset
� � contentHeight
� � topPadding
� � bottomPadding � � � Popup
� � CloseOnEscape � � � CloseOnPressOutsideParent � � � CloseOnReleaseOutsideParent � � palette � � � toolTipBase � � � dark � � � Wrap � � � toolTipText : � � qrc:/qt-project.org/imports/QtQuick/Controls.2/ToolTip.qml � � � p � � � %�� � � &�� � � '�� � � � (�� ư ɨ ˰ � � T T T T T T � Ĭ *�+Pİ İ ��� APAĐ� ��� :P:Ġ� � 8P8Ġ� � 6P6à� � 5P5à� � 2P2Ő � 0P0ŀ
� .P.�� -P-� � T T T T T �T ´ :Ġ ´ ´ �