Last active
June 10, 2023 18:46
-
-
Save bbbradsmith/6814196ea48ff38cfe4ff17b64f4d82c to your computer and use it in GitHub Desktop.
Plex subtitle file fixer and cleanup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# This scrips attempts to find suitable subtitles in video collections, | |
# and will copy the best candidate into the same folder as the video, | |
# with the same filename as the video with the subtitle's extension. | |
# | |
# If a video already has a subtitle file in this place, it will not overwrite it. | |
# | |
# I can also delete directories and files as automatic cleanup. | |
# | |
# Subtitle candidates are currently ranked by these criteria, in order of priority: | |
# 1. Contains the video's base filename in its path or filename. | |
# 2. Contains a substring that may indicate a preferred language (e.g. "en"). | |
# 3. Contains the most data. (Prefer more descriptive text.) | |
# | |
# This will operate on the current folder. | |
# | |
import os | |
import shutil | |
PREVIEW = False # True to prevent any changes to files. | |
CANDIDATES = False # True to show all ranked subtitle candidates. | |
STRICT_SIBLINGS = True # Require exact filename match for subtitles if there are multiple videos in the same folder. | |
CLEANUP = True # Remove some unneeded files after fixing subtitles. | |
# These lists must be lowercase. | |
VIDEO_EXT = (".mp4",".mkv",".avi",".flv",".mpeg",".mov",".m4v") # video file types | |
SUB_EXT = (".srt",".smi",".ssa",".ass",".vtt") # subtitle file types | |
LANG_RANK = ["en","eng","english"] # rank language preference by contained substrings, later=preferred | |
CLEAN_EXT = tuple(x.lower() for x in ( # remove files with these suffixes | |
".exe",".nfo", | |
"rarbg.txt", | |
"source.txt", | |
"www.YTS.MX.jpg", | |
"www.YTS.LT.jpg", | |
"www.YTS.RE.jpg", | |
"www.YTS.TO.jpg", | |
"YTSProxies.com.txt", | |
"YIFYStatus.com.txt", | |
"WWW.YIFY-TORRENTS.COM.jpg", | |
"[TGx]Downloaded from torrentgalaxy.to .txt", | |
"NEW upcoming releases by Xclusive.txt" | |
)) | |
CLEAN_DIRS = () # remove directories with these exact names | |
#CLEAN_DIRS = ("subs") # This could remove "Subs" folders, but it seems better to keep them just in case. | |
def subfix(path,strict_siblings=False,preview=False,candidates=False): | |
# gather video list | |
videos = [] | |
for (root,dirs,files) in os.walk(path): | |
for f in files: | |
if f.lower().endswith(VIDEO_EXT): | |
videos.append(os.path.join(root,f)) | |
# for each video select best subtitle | |
for v in videos: | |
print("Video: %s" % v) | |
(vroot,vfile) = os.path.split(v) | |
(vbase,vext) = os.path.splitext(vfile) | |
vbasel = vbase.lower() | |
sibling = False | |
if strict_siblings: # check for siblings | |
count = 0 | |
for f in os.listdir(vroot): | |
if f.lower().endswith(VIDEO_EXT): | |
count += 1 | |
if (count > 1): | |
sibling = True | |
# gather all subs in the video folder or below | |
subs = [] | |
for (sroot,sdirs,sfiles) in os.walk(vroot): | |
for f in sfiles: | |
if f.lower().endswith(SUB_EXT): | |
p = os.path.join(sroot,f) | |
if (not sibling) or (vbasel in p.lower()): # siblings require exact match | |
subs.append(p) | |
# rank the subs | |
subranks = [] | |
for i in range(len(subs)): | |
s = subs[i] | |
sl = s.lower() | |
# first rank by containing the base name | |
rank_base = 0 | |
if vbasel in sl: | |
rank_base = 1 | |
# next rank by containing language string | |
rank_lang = 0 | |
for l in range(len(LANG_RANK)): | |
if LANG_RANK[l] in sl: | |
rank_lang = l + 1 | |
# next rank by filesize | |
rank_fs = os.path.getsize(s) | |
# build rank | |
subranks.append((rank_base,rank_lang,rank_fs)) | |
# choose the best ranked | |
best_sub_index = 0 | |
best_rank = (0,0,0) | |
for i in range(len(subs)): | |
r = subranks[i] | |
if r > best_rank: | |
best_sub_index = i | |
best_rank = r | |
# list candidates | |
if candidates: | |
for i in range(len(subs)): | |
s = subs[i] | |
r = subranks[i] | |
descrip = " best" if (i == best_sub_index) else "" | |
print(" ??? > %s (%d,%d,%d)%s" % tuple([s]+list(r)+[descrip])) | |
# apply if found | |
if len(subs) == 0: | |
print(" NONE.") | |
else: | |
s = subs[best_sub_index] | |
descrip = "---->" | |
if not preview: | |
(sbase,sext) = os.path.splitext(s) | |
of = os.path.join(vroot,vbase + sext) | |
if os.path.exists(of): | |
descrip = "SKIP>" | |
else: | |
try: | |
shutil.copyfile(s,of) | |
except: | |
descrip = "ERR!>" | |
print(" %s %s" % (descrip,s)) | |
def cleanup(path,preview=False): | |
clean_files = [] | |
clean_dirs = [] | |
for (root,dirs,files) in os.walk(path): | |
for d in dirs: | |
if d.lower() in CLEAN_DIRS: | |
clean_dirs.append(os.path.join(root,d)) | |
for f in files: | |
if f.lower().endswith(CLEAN_EXT): | |
clean_files.append(os.path.join(root,f)) | |
if len(clean_dirs) > 0: | |
print("Cleanup directories:") | |
for d in clean_dirs: | |
print(" > %s" % d) | |
if not preview: | |
try: | |
shutil.rmtree(d) | |
except: | |
print(" >> ERROR!") | |
if len(clean_files) > 0: | |
print("Cleanup files:") | |
for f in clean_files: | |
print(" > %s" % f) | |
if not preview: | |
try: | |
os.remove(f) | |
except: | |
print(" >> ERROR!") | |
if (len(clean_dirs) + len(clean_files)) == 0: | |
print("Cleanup found nothing to remove."); | |
subfix(".",STRICT_SIBLINGS,PREVIEW,CANDIDATES) | |
cleanup(".",PREVIEW) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment