Last active
July 7, 2023 08:57
-
-
Save CraftyCanine/3a1a86b2837092038edab2ea33182460 to your computer and use it in GitHub Desktop.
episodesplit.py - Python script that combines functionality from ffmpeg and mkvmerge to identify and split multi-episode TV shows into separate files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#---------------------------------------------------------- | |
# | |
# Episode Split | |
# CraftyCanine | |
# | |
# Description | |
# | |
# This script is meant to make the process of splitting | |
# multi-episode files for 15 min TV shows into 1 episode | |
# per file. | |
# | |
# | |
# Requirements | |
# | |
# ffmpeg, mkvmerge | |
# | |
# | |
# Instructions | |
# | |
# Since this is a command-line tool, hopefully you know your way around the terminal. | |
# | |
# 1) Install ffmpeg, mkvmerge, and python less than python 3 from your favorite package manager (yum, dpkg, brew, etc) if you don't already have them. | |
# 2) Download the file from gist by getting the raw link and copy pasting it into a file or run "wget <link>". | |
# 3) Change into the directory where the files you want to split are stored. | |
# 4) Run "python episodesplit.py -i <name of video>" to run with defaults. Depending on the quality of the video, it could take around 5 minutes. | |
# | |
# Note: If the tool doesn't want to split or is coming back with a bunch of choices, you might want to adjust the thresholds. | |
# There's "-b <length in seconds>" to adjust the minimum black section length to search for. There's also | |
# "-t <ratio of black pixels to non-black pixels>" for adjusting the sensitivity for what is considered a black frame. | |
# Run "python episodesplit.py -h" for more info on the arguments. | |
# | |
# -------------------------------------------------------- | |
import sys,os,math | |
import logging | |
from logging.handlers import RotatingFileHandler | |
import argparse | |
from subprocess import Popen, PIPE | |
import re | |
#ffmpeg -i tvshow.mkv -vf "blackdetect=d=1.5" -an -f null - | |
#mkvmerge -o "test.mkv" --split "timecodes:00:11:37.321999998" "tvshow.mkv" | |
# Configurables ------------------------------------------ | |
#Regex patterns for ffmpeg output. Both duration patterns are checked. | |
black_pattern = '\[blackdetect @ .+?\] black_start:(\d+?\.\d+?) black_end:(\d+?\.\d+?) black_duration:(\d+?\.\d+?)\n' | |
duration_pattern = 'DURATION\s+?: (\d+?):(\d+?):(\d+?\.\d+?)\n' | |
duration_pattern2 = 'Duration: (\d+?):(\d+?):(\d+?\.\d+?),' | |
#Regex pattern for old filename | |
filename_pattern = "(.+?)\.?(S\d\d)(E\d\d)-(E\d\d)\.(.+?)\.([\w,\.,',\,,\-]+?)\.?\+\.([\w,\.,',\,,\-]+?)\.%s" | |
#String format pattern for new filename | |
filename_new_pattern = '{show_name}.{season}{episode}.{quality}.{epname}.{fileExt}' | |
#Test data for fake data parameter | |
test_ffmpeg = '''''' # insert new test ffmpeg output here | |
test_mkvmerge = '''''' # insert new test mkvmerge output here | |
# Globals ------------------------------------------------ | |
debug = False | |
tryRename = False | |
fakeData = False | |
black_length = 1.0 | |
# Parse Arguments ----------------------------------------- | |
parser = argparse.ArgumentParser(description='Script designed to ease the process of splitting multi-episode files for 15 min TV shows into separate files with 1 episode per file. NOTE: mkvmerge, the tool used to split the files, works using key frames. The split must occur on a key frame, so might not be directly at the episode mark.') | |
parser.add_argument('-i', '--input',help='Input video file to be split.',required=True,nargs='+') | |
parser.add_argument('-d','--debug',help='Enable debug mode NOTE: wouldn\'t recommend running debug mode with more than one file...',required=False,action='store_true') | |
parser.add_argument('-n',help='Number of episodes in file (not working yet, don\'t use)',required=False,default=2) | |
parser.add_argument('-l','--logpath',help='Path to log file',required=False,default=os.path.splitext(__file__)[0] + '.log') | |
parser.add_argument('-r','--norename',help='Do not try to rename resulting episode files using original file name (Script will try to rename by default)',required=False,action='store_false') | |
parser.add_argument('-f','--fakedata',help='Use fake data instead of running on a file (input file still required, but won\'t be used. NOTE: Test data configurable variables (top of source file) must be populated with test output of ffmpeg and mkvmerge.',required=False,action='store_true') | |
parser.add_argument('-b','--black',help='Length of time in seconds that we are looking for black segments for potential split points (default is 1.0)',required=False,default=1.0,type=float) | |
parser.add_argument('-t','--threshold',help='Threshold of black to non-black pixels (default is 0.98)',required=False,default=0.98,type=float) | |
args = parser.parse_args() | |
#Set debug flag | |
if args.debug is not None: | |
debug = args.debug | |
#Set input video (multi-episodes) | |
inputvid = args.input | |
#set number of eps | |
numEps = float(args.n) | |
#Log file name | |
name = args.logpath | |
#Should we rename the resulting files? | |
tryRename = args.norename | |
#Should we use fake data? | |
fakeData = args.fakedata | |
#Length of black segment to look for | |
black_length = '%.1f' % args.black | |
black_threshold = '%.2f' % args.threshold | |
# Logging --------------------------------------- | |
#Setting up file name & log format | |
fmt='%(asctime)s [%(levelname)s] %(message)s' | |
#get root logger, set up log formatter & handlers | |
rootLogger = logging.getLogger() | |
logFormatter = logging.Formatter(fmt) | |
fileHandler = RotatingFileHandler(name,maxBytes=5e+6, backupCount=10) | |
consoleHandler = logging.StreamHandler(sys.stdout) | |
#set formatters | |
fileHandler.setFormatter(logFormatter) | |
consoleHandler.setFormatter(logFormatter) | |
#set log levels | |
if debug: | |
rootLogger.setLevel(logging.DEBUG) | |
else: | |
rootLogger.setLevel(logging.INFO) | |
#add handlers | |
rootLogger.addHandler(fileHandler) | |
rootLogger.addHandler(consoleHandler) | |
# Start Execution ----------------------------------------- | |
for vid in inputvid: | |
if fakeData == False: | |
logging.info('Splitting file: %s' % vid) | |
logging.info('Scanning file for black segments with black ratio at least %s and at least %s second(s) in length...' % (black_threshold,black_length)) | |
session = Popen(['ffmpeg', '-i', vid, '-vf', 'blackdetect=d='+black_length+':pic_th='+black_threshold, '-an', '-f', 'null', '-'], stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
res_text = session.communicate() | |
if debug: print res_text | |
res_text = res_text[1] | |
if debug: print '\n\n\n' | |
if debug: print res_text | |
else: | |
res_text = test_ffmpeg | |
blacks = re.findall(black_pattern,res_text) | |
duration = re.findall(duration_pattern,res_text) | |
if len(duration) == 0: | |
duration = re.findall(duration_pattern2,res_text) | |
#Check for issues with ffmpeg | |
if len(blacks) == 0 or len(duration) == 0: | |
logging.error('There was a problem parsing the video. Perhaps the black length value should be decreased?') | |
logging.debug('Length blacks: %d Length duration: %d' % (len(blacks),len(duration))) | |
#exit(1) | |
continue | |
duration = duration[0] | |
#do some math | |
totalMins = float(duration[0])*60 + float(duration[1]) #calc the total minutes by multiplying the hours by 60 and adding the minutes | |
seconds = float(duration[2]) | |
totalSeconds = totalMins*60 + seconds #calc the total seconds by multiplying the mins by 60 and adding the seconds | |
lenEst = totalSeconds / numEps #calc the approx. ep length in secs by dividing the total seconds by number of episodes in the file | |
margin = lenEst / 3 #calc the acceptible margin (1/3 of one episode) | |
logging.debug(blacks) | |
logging.debug(duration) | |
logging.debug(totalSeconds) | |
logging.debug('estimated episode length = %.4f' % lenEst) | |
logging.debug('margin = %.4f - %.4f' % (lenEst - margin, lenEst + margin)) | |
selected_blacks = [] | |
#loop through all identified regions | |
for black in blacks: | |
start = float(black[0]) | |
end = float(black[1]) | |
duration = float(black[2]) | |
if start > lenEst - margin and start < lenEst + margin: #this will only pass if the identified black is within our acceptible margins | |
selected_blacks.append(black) | |
if len(selected_blacks) == 0: | |
logging.info('No suitable black sections were found. Try changing the pixel ratio threshold and minimum black length (-t and -b)') | |
#exit(0) | |
continue | |
if len(selected_blacks) == 2: | |
secs1 = float(selected_blacks[0][0]) | |
secs2 = float(selected_blacks[1][0]) | |
if (secs2 - secs1) < 10: | |
logging.info('Two possible black sections identified but they are within 10 seconds. Using first as the episode break...') | |
selected_blacks = [selected_blacks[0]] | |
if len(selected_blacks) > 2: | |
print 'More than one black section has been identified.\nPlease select the section that you want to split on.' | |
for i in range(len(selected_blacks)): | |
mins = float(selected_blacks[i][0]) / 60 | |
secs = float(selected_blacks[i][0]) % 60 | |
print '%d: %d:%05.2f' % (i,mins,secs) | |
print '%d: Skip this video' % len(selected_blacks) | |
answer = '' | |
print 'Please enter your choice:', | |
while(True): | |
answer = raw_input() | |
try: | |
answer = int(answer) | |
except ValueError as verr: | |
print 'Try again, please enter a number:', | |
continue | |
if answer < len(selected_blacks) and answer >= 0: | |
print 'Continuing processing...' | |
break | |
elif answer == len(selected_blacks): | |
print 'Skipping...' | |
break | |
else: | |
print 'Please enter the number of one of the choices above:', | |
if answer == len(selected_blacks): | |
continue | |
else: | |
selected_blacks = [selected_blacks[answer]] | |
black = selected_blacks[0] | |
start = float(black[0]) | |
end = float(black[1]) | |
duration = float(black[2]) | |
#split = start + (duration/2) #calc the time to split the file as the duration of the black divided by 2 added on to the start | |
split = start | |
split_tc = (int(math.floor(split/60/60)),int(math.floor(split/60)),split%60) #calc the number of hours, minutes, seconds from number of seconds | |
logging.debug('Identified episode break: %s' % (black,)) | |
logging.debug('split time: %.4f' % split) | |
logging.debug('timecodes:%.4f:%.4f:%06.4f' % split_tc) | |
#now we want to execute the below on the video with the identified time code as the target of the split | |
#mkvmerge -o "test.mkv" --split "timecodes:00:xx:yy.zzzzzzz" "tvshow.mkv" | |
if fakeData == False: | |
time = "timecodes:%d:%d:%06.4f" % split_tc | |
logging.debug('Splitting with...'+time) | |
session = Popen(['mkvmerge', '-o', 'converted.mkv', '--split', time, vid], stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
res_text = session.communicate() | |
res_text = res_text[0] | |
else: | |
res_text = test_mkvmerge | |
if debug: print res_text | |
#check if split was successful | |
if re.search('Progress: 100%',res_text) != None: | |
logging.info('Great Success! Episodes split at %d:%d:%06.4f.' % split_tc) | |
#now lets do a rename | |
if numEps == 2 and tryRename == True: | |
fileName = os.path.basename(vid) | |
fileExt = os.path.splitext(vid)[1][1:] | |
name_info = re.findall(filename_pattern % fileExt,fileName) | |
#if the regex works | |
if len(name_info) > 0: | |
name_info = name_info[0] | |
show_name = name_info[0] | |
season = name_info[1] | |
episode1 = name_info[2] | |
episode2 = name_info[3] | |
quality = name_info[4] | |
epname1 = name_info[5] | |
epname2 = name_info[6] | |
new_name_1 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode1,quality=quality,epname=epname1,fileExt=fileExt) | |
new_name_2 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode2,quality=quality,epname=epname2,fileExt=fileExt) | |
logging.info('File 1: ' + new_name_1) | |
logging.info('File 2: ' + new_name_2) | |
#Renaming files | |
os.rename('converted-001.mkv', new_name_1) | |
os.rename('converted-002.mkv', new_name_2) | |
logging.info('New episode files renamed!') | |
#regex didn't work | |
else: | |
logging.warning('Could not parse input file name, using original name') | |
name_ext = os.path.splitext(vid) | |
os.rename('converted-001.mkv',name_ext[0]+'-Part1'+name_ext[1]) | |
os.rename('converted-002.mkv',name_ext[0]+'-Part2'+name_ext[1]) | |
#split didn't work :( | |
else: | |
logging.error('The split may have failed') | |
continue |
hey there. thanks for your work and sharing.
i would love to use this script aswell;
but for series longer than 15 minutes, which are 99% of the time only 2-part episodes cut in half at almost exactly 50% of each file.
so there is no need to search the whole file, only like from 48% to 52% in the file.
is there any chance you could enhance the script that way? i'm searching for years for a working script :/
thank you
Hello, glad to help! It's been a long time since I've worked on this but I would be glad to take a look at it for you. Is it not working at all? Seems like you're saying it still works, this would just make it quicker?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you for commenting, I'm glad to hear that this helped someone! I know it's a bit of a niche area but I figured someone would be able to use it some day.