bbbradsmith · June 10, 2023 18:46
diff --git a/subfix.py b/subfix.py
 #!/usr/bin/env python3

 #
 # This scrips attempts to find suitable subtitles in video collections,
 # and will copy the best candidate into the same folder as the video,
 # with the same filename as the video with the subtitle's extension.
 #
 # If a video already has a subtitle file in this place, it will not overwrite it.
 #
 # I can also delete directories and files as automatic cleanup.
 #
 # Subtitle candidates are currently ranked by these criteria, in order of priority:
 # 1. Contains the video's base filename in its path or filename.
 # 2. Contains a substring that may indicate a preferred language (e.g. "en").
 # 3. Contains the most data. (Prefer more descriptive text.)
 #
 # This will operate on the current folder.
 #

 import os
 import shutil

 PREVIEW = False # True to prevent any changes to files.
 CANDIDATES = False # True to show all ranked subtitle candidates.
 STRICT_SIBLINGS = True # Require exact filename match for subtitles if there are multiple videos in the same folder.
 CLEANUP = True # Remove some unneeded files after fixing subtitles.

 # These lists must be lowercase.
 VIDEO_EXT = (".mp4",".mkv",".avi",".flv",".mpeg",".mov",".m4v") # video file types
 SUB_EXT = (".srt",".smi",".ssa",".ass",".vtt") # subtitle file types
 LANG_RANK = ["en","eng","english"] # rank language preference by contained substrings, later=preferred
 CLEAN_EXT = tuple(x.lower() for x in ( # remove files with these suffixes
    ".exe",".nfo",
    "rarbg.txt",
    "source.txt",
    "www.YTS.MX.jpg",
    "www.YTS.LT.jpg",
    "www.YTS.RE.jpg",
    "www.YTS.TO.jpg",
    "YTSProxies.com.txt",
    "YIFYStatus.com.txt",
    "WWW.YIFY-TORRENTS.COM.jpg",
    "[TGx]Downloaded from torrentgalaxy.to .txt",
    "NEW upcoming releases by Xclusive.txt"
    ))
 CLEAN_DIRS = () # remove directories with these exact names
 #CLEAN_DIRS = ("subs") # This could remove "Subs" folders, but it seems better to keep them just in case.


 def subfix(path,strict_siblings=False,preview=False,candidates=False):
    # gather video list
    videos = []
    for (root,dirs,files) in os.walk(path):
        for f in files:
            if f.lower().endswith(VIDEO_EXT):
                videos.append(os.path.join(root,f))
    # for each video select best subtitle
    for v in videos:
        print("Video: %s" % v)
        (vroot,vfile) = os.path.split(v)
        (vbase,vext) = os.path.splitext(vfile)
        vbasel = vbase.lower()
        sibling = False
        if strict_siblings: # check for siblings
            count = 0
            for f in os.listdir(vroot):
                if f.lower().endswith(VIDEO_EXT):
                    count += 1
            if (count > 1):
                sibling = True
        # gather all subs in the video folder or below
        subs = []
        for (sroot,sdirs,sfiles) in os.walk(vroot):
            for f in sfiles:
                if f.lower().endswith(SUB_EXT):
                    p = os.path.join(sroot,f)
                    if (not sibling) or (vbasel in p.lower()): # siblings require exact match
                        subs.append(p)
        # rank the subs
        subranks = []
        for i in range(len(subs)):
            s = subs[i]
            sl = s.lower()
            # first rank by containing the base name
            rank_base = 0
            if vbasel in sl:
                rank_base = 1
            # next rank by containing language string
            rank_lang = 0
            for l in range(len(LANG_RANK)):
                if LANG_RANK[l] in sl:
                    rank_lang = l + 1
            # next rank by filesize
            rank_fs = os.path.getsize(s)
            # build rank
            subranks.append((rank_base,rank_lang,rank_fs))
        # choose the best ranked
        best_sub_index = 0
        best_rank = (0,0,0)
        for i in range(len(subs)):
            r = subranks[i]
            if r > best_rank:
                best_sub_index = i
                best_rank = r
        # list candidates
        if candidates:
            for i in range(len(subs)):
                s = subs[i]
                r = subranks[i]
                descrip = " best" if (i == best_sub_index) else ""
                print(" ??? > %s (%d,%d,%d)%s" % tuple([s]+list(r)+[descrip]))
        # apply if found
        if len(subs) == 0:
            print(" NONE.")
        else:
            s = subs[best_sub_index]
            descrip = "---->"
            if not preview:
                (sbase,sext) = os.path.splitext(s)
                of = os.path.join(vroot,vbase + sext)
                if os.path.exists(of):
                    descrip = "SKIP>"
                else:
                    try:
                        shutil.copyfile(s,of)
                    except:
                        descrip = "ERR!>"
            print(" %s %s" % (descrip,s))


 def cleanup(path,preview=False):
    clean_files = []
    clean_dirs = []
    for (root,dirs,files) in os.walk(path):
        for d in dirs:
            if d.lower() in CLEAN_DIRS:
                clean_dirs.append(os.path.join(root,d))
        for f in files:
            if f.lower().endswith(CLEAN_EXT):
                clean_files.append(os.path.join(root,f))
    if len(clean_dirs) > 0:
        print("Cleanup directories:")
        for d in clean_dirs:
            print(" > %s" % d)
            if not preview:
                try:
                    shutil.rmtree(d)
                except:
                    print(" >> ERROR!")
    if len(clean_files) > 0:
        print("Cleanup files:")
        for f in clean_files:
            print(" > %s" % f)
            if not preview:
                try:
                    os.remove(f)
                except:
                    print(" >> ERROR!")
    if (len(clean_dirs) + len(clean_files)) == 0:
        print("Cleanup found nothing to remove.");


 subfix(".",STRICT_SIBLINGS,PREVIEW,CANDIDATES)
 cleanup(".",PREVIEW)
	#!/usr/bin/env python3

	#
	# This scrips attempts to find suitable subtitles in video collections,
	# and will copy the best candidate into the same folder as the video,
	# with the same filename as the video with the subtitle's extension.
	#
	# If a video already has a subtitle file in this place, it will not overwrite it.
	#
	# I can also delete directories and files as automatic cleanup.
	#
	# Subtitle candidates are currently ranked by these criteria, in order of priority:
	# 1. Contains the video's base filename in its path or filename.
	# 2. Contains a substring that may indicate a preferred language (e.g. "en").
	# 3. Contains the most data. (Prefer more descriptive text.)
	#
	# This will operate on the current folder.
	#

	import os
	import shutil

	PREVIEW = False # True to prevent any changes to files.
	CANDIDATES = False # True to show all ranked subtitle candidates.
	STRICT_SIBLINGS = True # Require exact filename match for subtitles if there are multiple videos in the same folder.
	CLEANUP = True # Remove some unneeded files after fixing subtitles.

	# These lists must be lowercase.
	VIDEO_EXT = (".mp4",".mkv",".avi",".flv",".mpeg",".mov",".m4v") # video file types
	SUB_EXT = (".srt",".smi",".ssa",".ass",".vtt") # subtitle file types
	LANG_RANK = ["en","eng","english"] # rank language preference by contained substrings, later=preferred
	CLEAN_EXT = tuple(x.lower() for x in ( # remove files with these suffixes
	".exe",".nfo",
	"rarbg.txt",
	"source.txt",
	"www.YTS.MX.jpg",
	"www.YTS.LT.jpg",
	"www.YTS.RE.jpg",
	"www.YTS.TO.jpg",
	"YTSProxies.com.txt",
	"YIFYStatus.com.txt",
	"WWW.YIFY-TORRENTS.COM.jpg",
	"[TGx]Downloaded from torrentgalaxy.to .txt",
	"NEW upcoming releases by Xclusive.txt"
	))
	CLEAN_DIRS = () # remove directories with these exact names
	#CLEAN_DIRS = ("subs") # This could remove "Subs" folders, but it seems better to keep them just in case.


	def subfix(path,strict_siblings=False,preview=False,candidates=False):
	# gather video list
	videos = []
	for (root,dirs,files) in os.walk(path):
	for f in files:
	if f.lower().endswith(VIDEO_EXT):
	videos.append(os.path.join(root,f))
	# for each video select best subtitle
	for v in videos:
	print("Video: %s" % v)
	(vroot,vfile) = os.path.split(v)
	(vbase,vext) = os.path.splitext(vfile)
	vbasel = vbase.lower()
	sibling = False
	if strict_siblings: # check for siblings
	count = 0
	for f in os.listdir(vroot):
	if f.lower().endswith(VIDEO_EXT):
	count += 1
	if (count > 1):
	sibling = True
	# gather all subs in the video folder or below
	subs = []
	for (sroot,sdirs,sfiles) in os.walk(vroot):
	for f in sfiles:
	if f.lower().endswith(SUB_EXT):
	p = os.path.join(sroot,f)
	if (not sibling) or (vbasel in p.lower()): # siblings require exact match
	subs.append(p)
	# rank the subs
	subranks = []
	for i in range(len(subs)):
	s = subs[i]
	sl = s.lower()
	# first rank by containing the base name
	rank_base = 0
	if vbasel in sl:
	rank_base = 1
	# next rank by containing language string
	rank_lang = 0
	for l in range(len(LANG_RANK)):
	if LANG_RANK[l] in sl:
	rank_lang = l + 1
	# next rank by filesize
	rank_fs = os.path.getsize(s)
	# build rank
	subranks.append((rank_base,rank_lang,rank_fs))
	# choose the best ranked
	best_sub_index = 0
	best_rank = (0,0,0)
	for i in range(len(subs)):
	r = subranks[i]
	if r > best_rank:
	best_sub_index = i
	best_rank = r
	# list candidates
	if candidates:
	for i in range(len(subs)):
	s = subs[i]
	r = subranks[i]
	descrip = " best" if (i == best_sub_index) else ""
	print(" ??? > %s (%d,%d,%d)%s" % tuple([s]+list(r)+[descrip]))
	# apply if found
	if len(subs) == 0:
	print(" NONE.")
	else:
	s = subs[best_sub_index]
	descrip = "---->"
	if not preview:
	(sbase,sext) = os.path.splitext(s)
	of = os.path.join(vroot,vbase + sext)
	if os.path.exists(of):
	descrip = "SKIP>"
	else:
	try:
	shutil.copyfile(s,of)
	except:
	descrip = "ERR!>"
	print(" %s %s" % (descrip,s))


	def cleanup(path,preview=False):
	clean_files = []
	clean_dirs = []
	for (root,dirs,files) in os.walk(path):
	for d in dirs:
	if d.lower() in CLEAN_DIRS:
	clean_dirs.append(os.path.join(root,d))
	for f in files:
	if f.lower().endswith(CLEAN_EXT):
	clean_files.append(os.path.join(root,f))
	if len(clean_dirs) > 0:
	print("Cleanup directories:")
	for d in clean_dirs:
	print(" > %s" % d)
	if not preview:
	try:
	shutil.rmtree(d)
	except:
	print(" >> ERROR!")
	if len(clean_files) > 0:
	print("Cleanup files:")
	for f in clean_files:
	print(" > %s" % f)
	if not preview:
	try:
	os.remove(f)
	except:
	print(" >> ERROR!")
	if (len(clean_dirs) + len(clean_files)) == 0:
	print("Cleanup found nothing to remove.");


	subfix(".",STRICT_SIBLINGS,PREVIEW,CANDIDATES)
	cleanup(".",PREVIEW)