Skip to content

Instantly share code, notes, and snippets.

@iwinux
Last active February 28, 2019 02:42
Show Gist options
  • Save iwinux/8329bf6c9e62f5e8233c082d855be050 to your computer and use it in GitHub Desktop.
Save iwinux/8329bf6c9e62f5e8233c082d855be050 to your computer and use it in GitHub Desktop.
brute-force ID3 encoding fix
from argparse import ArgumentParser
from os import path, walk
from typing import Iterable
import sys
from chardet import detect
from mutagen.id3 import Encoding, ID3, TextFrame
TARGET_FIELDS = {'TALB', 'TCOM', 'TIT2', 'TPE1', 'TPE2'}
def collect_files(root_dir: str) -> Iterable[str]:
for parent, _, names in walk(root_dir):
yield from (path.join(parent, f) for f in names if f.endswith('.mp3'))
def is_fake_ascii(text: str) -> bool:
try:
return detect(text.encode('ascii'))['confidence'] != 1.0
except UnicodeEncodeError:
return True
def is_suspect(meta: ID3) -> bool:
for key in TARGET_FIELDS:
if key not in meta:
continue
value = meta[key]
if value.encoding == Encoding.LATIN1 and is_fake_ascii(value.text[0]):
return True
return False
def fix_frame_encoding(frame: TextFrame) -> TextFrame:
if frame.encoding != Encoding.LATIN1:
return frame
# can I copy & update instead of doing it in-place?
frame.text = [frame.text[0].encode('latin1').decode('gbk')]
frame.encoding = Encoding.UTF16
return frame
def main(args):
for f in collect_files(path.abspath(args.dir)):
meta = ID3(f)
if is_suspect(meta):
fields = []
for key in TARGET_FIELDS:
frame = meta.get(key)
if not frame:
continue
orig = str(meta[key])
fixed = str(fix_frame_encoding(meta[key]))
if fixed != orig:
fields.append(f'{key}: {orig} -> {fixed}')
if fields:
meta.save()
print('; '.join(fields))
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('dir')
main(parser.parse_args(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment