|
from argparse import ArgumentParser |
|
from os import path, walk |
|
from typing import Iterable |
|
import sys |
|
|
|
from chardet import detect |
|
from mutagen.id3 import Encoding, ID3, TextFrame |
|
|
|
TARGET_FIELDS = {'TALB', 'TCOM', 'TIT2', 'TPE1', 'TPE2'} |
|
|
|
|
|
def collect_files(root_dir: str) -> Iterable[str]: |
|
for parent, _, names in walk(root_dir): |
|
yield from (path.join(parent, f) for f in names if f.endswith('.mp3')) |
|
|
|
|
|
def is_fake_ascii(text: str) -> bool: |
|
try: |
|
return detect(text.encode('ascii'))['confidence'] != 1.0 |
|
except UnicodeEncodeError: |
|
return True |
|
|
|
|
|
def is_suspect(meta: ID3) -> bool: |
|
for key in TARGET_FIELDS: |
|
if key not in meta: |
|
continue |
|
|
|
value = meta[key] |
|
|
|
if value.encoding == Encoding.LATIN1 and is_fake_ascii(value.text[0]): |
|
return True |
|
|
|
return False |
|
|
|
|
|
def fix_frame_encoding(frame: TextFrame) -> TextFrame: |
|
if frame.encoding != Encoding.LATIN1: |
|
return frame |
|
|
|
# can I copy & update instead of doing it in-place? |
|
frame.text = [frame.text[0].encode('latin1').decode('gbk')] |
|
frame.encoding = Encoding.UTF16 |
|
return frame |
|
|
|
|
|
def main(args): |
|
for f in collect_files(path.abspath(args.dir)): |
|
meta = ID3(f) |
|
|
|
if is_suspect(meta): |
|
fields = [] |
|
|
|
for key in TARGET_FIELDS: |
|
frame = meta.get(key) |
|
if not frame: |
|
continue |
|
|
|
orig = str(meta[key]) |
|
fixed = str(fix_frame_encoding(meta[key])) |
|
|
|
if fixed != orig: |
|
fields.append(f'{key}: {orig} -> {fixed}') |
|
|
|
if fields: |
|
meta.save() |
|
|
|
print('; '.join(fields)) |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = ArgumentParser() |
|
parser.add_argument('dir') |
|
main(parser.parse_args(sys.argv[1:])) |