-
-
Save CraftyCanine/3a1a86b2837092038edab2ea33182460 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
#---------------------------------------------------------- | |
# | |
# Episode Split | |
# CraftyCanine | |
# | |
# Description | |
# | |
# This script is meant to make the process of splitting | |
# multi-episode files for 15 min TV shows into 1 episode | |
# per file. | |
# | |
# | |
# Requirements | |
# | |
# ffmpeg, mkvmerge | |
# | |
# | |
# Instructions | |
# | |
# Since this is a command-line tool, hopefully you know your way around the terminal. | |
# | |
# 1) Install ffmpeg, mkvmerge, and python less than python 3 from your favorite package manager (yum, dpkg, brew, etc) if you don't already have them. | |
# 2) Download the file from gist by getting the raw link and copy pasting it into a file or run "wget <link>". | |
# 3) Change into the directory where the files you want to split are stored. | |
# 4) Run "python episodesplit.py -i <name of video>" to run with defaults. Depending on the quality of the video, it could take around 5 minutes. | |
# | |
# Note: If the tool doesn't want to split or is coming back with a bunch of choices, you might want to adjust the thresholds. | |
# There's "-b <length in seconds>" to adjust the minimum black section length to search for. There's also | |
# "-t <ratio of black pixels to non-black pixels>" for adjusting the sensitivity for what is considered a black frame. | |
# Run "python episodesplit.py -h" for more info on the arguments. | |
# | |
# -------------------------------------------------------- | |
import sys,os,math | |
import logging | |
from logging.handlers import RotatingFileHandler | |
import argparse | |
from subprocess import Popen, PIPE | |
import re | |
#ffmpeg -i tvshow.mkv -vf "blackdetect=d=1.5" -an -f null - | |
#mkvmerge -o "test.mkv" --split "timecodes:00:11:37.321999998" "tvshow.mkv" | |
# Configurables ------------------------------------------ | |
#Regex patterns for ffmpeg output. Both duration patterns are checked. | |
black_pattern = '\[blackdetect @ .+?\] black_start:(\d+?\.\d+?) black_end:(\d+?\.\d+?) black_duration:(\d+?\.\d+?)\n' | |
duration_pattern = 'DURATION\s+?: (\d+?):(\d+?):(\d+?\.\d+?)\n' | |
duration_pattern2 = 'Duration: (\d+?):(\d+?):(\d+?\.\d+?),' | |
#Regex pattern for old filename | |
filename_pattern = "(.+?)\.?(S\d\d)(E\d\d)-(E\d\d)\.(.+?)\.([\w,\.,',\,,\-]+?)\.?\+\.([\w,\.,',\,,\-]+?)\.%s" | |
#String format pattern for new filename | |
filename_new_pattern = '{show_name}.{season}{episode}.{quality}.{epname}.{fileExt}' | |
#Test data for fake data parameter | |
test_ffmpeg = '''''' # insert new test ffmpeg output here | |
test_mkvmerge = '''''' # insert new test mkvmerge output here | |
# Globals ------------------------------------------------ | |
debug = False | |
tryRename = False | |
fakeData = False | |
black_length = 1.0 | |
# Parse Arguments ----------------------------------------- | |
parser = argparse.ArgumentParser(description='Script designed to ease the process of splitting multi-episode files for 15 min TV shows into separate files with 1 episode per file. NOTE: mkvmerge, the tool used to split the files, works using key frames. The split must occur on a key frame, so might not be directly at the episode mark.') | |
parser.add_argument('-i', '--input',help='Input video file to be split.',required=True,nargs='+') | |
parser.add_argument('-d','--debug',help='Enable debug mode NOTE: wouldn\'t recommend running debug mode with more than one file...',required=False,action='store_true') | |
parser.add_argument('-n',help='Number of episodes in file (not working yet, don\'t use)',required=False,default=2) | |
parser.add_argument('-l','--logpath',help='Path to log file',required=False,default=os.path.splitext(__file__)[0] + '.log') | |
parser.add_argument('-r','--norename',help='Do not try to rename resulting episode files using original file name (Script will try to rename by default)',required=False,action='store_false') | |
parser.add_argument('-f','--fakedata',help='Use fake data instead of running on a file (input file still required, but won\'t be used. NOTE: Test data configurable variables (top of source file) must be populated with test output of ffmpeg and mkvmerge.',required=False,action='store_true') | |
parser.add_argument('-b','--black',help='Length of time in seconds that we are looking for black segments for potential split points (default is 1.0)',required=False,default=1.0,type=float) | |
parser.add_argument('-t','--threshold',help='Threshold of black to non-black pixels (default is 0.98)',required=False,default=0.98,type=float) | |
args = parser.parse_args() | |
#Set debug flag | |
if args.debug is not None: | |
debug = args.debug | |
#Set input video (multi-episodes) | |
inputvid = args.input | |
#set number of eps | |
numEps = float(args.n) | |
#Log file name | |
name = args.logpath | |
#Should we rename the resulting files? | |
tryRename = args.norename | |
#Should we use fake data? | |
fakeData = args.fakedata | |
#Length of black segment to look for | |
black_length = '%.1f' % args.black | |
black_threshold = '%.2f' % args.threshold | |
# Logging --------------------------------------- | |
#Setting up file name & log format | |
fmt='%(asctime)s [%(levelname)s] %(message)s' | |
#get root logger, set up log formatter & handlers | |
rootLogger = logging.getLogger() | |
logFormatter = logging.Formatter(fmt) | |
fileHandler = RotatingFileHandler(name,maxBytes=5e+6, backupCount=10) | |
consoleHandler = logging.StreamHandler(sys.stdout) | |
#set formatters | |
fileHandler.setFormatter(logFormatter) | |
consoleHandler.setFormatter(logFormatter) | |
#set log levels | |
if debug: | |
rootLogger.setLevel(logging.DEBUG) | |
else: | |
rootLogger.setLevel(logging.INFO) | |
#add handlers | |
rootLogger.addHandler(fileHandler) | |
rootLogger.addHandler(consoleHandler) | |
# Start Execution ----------------------------------------- | |
for vid in inputvid: | |
if fakeData == False: | |
logging.info('Splitting file: %s' % vid) | |
logging.info('Scanning file for black segments with black ratio at least %s and at least %s second(s) in length...' % (black_threshold,black_length)) | |
session = Popen(['ffmpeg', '-i', vid, '-vf', 'blackdetect=d='+black_length+':pic_th='+black_threshold, '-an', '-f', 'null', '-'], stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
res_text = session.communicate() | |
if debug: print res_text | |
res_text = res_text[1] | |
if debug: print '\n\n\n' | |
if debug: print res_text | |
else: | |
res_text = test_ffmpeg | |
blacks = re.findall(black_pattern,res_text) | |
duration = re.findall(duration_pattern,res_text) | |
if len(duration) == 0: | |
duration = re.findall(duration_pattern2,res_text) | |
#Check for issues with ffmpeg | |
if len(blacks) == 0 or len(duration) == 0: | |
logging.error('There was a problem parsing the video. Perhaps the black length value should be decreased?') | |
logging.debug('Length blacks: %d Length duration: %d' % (len(blacks),len(duration))) | |
#exit(1) | |
continue | |
duration = duration[0] | |
#do some math | |
totalMins = float(duration[0])*60 + float(duration[1]) #calc the total minutes by multiplying the hours by 60 and adding the minutes | |
seconds = float(duration[2]) | |
totalSeconds = totalMins*60 + seconds #calc the total seconds by multiplying the mins by 60 and adding the seconds | |
lenEst = totalSeconds / numEps #calc the approx. ep length in secs by dividing the total seconds by number of episodes in the file | |
margin = lenEst / 3 #calc the acceptible margin (1/3 of one episode) | |
logging.debug(blacks) | |
logging.debug(duration) | |
logging.debug(totalSeconds) | |
logging.debug('estimated episode length = %.4f' % lenEst) | |
logging.debug('margin = %.4f - %.4f' % (lenEst - margin, lenEst + margin)) | |
selected_blacks = [] | |
#loop through all identified regions | |
for black in blacks: | |
start = float(black[0]) | |
end = float(black[1]) | |
duration = float(black[2]) | |
if start > lenEst - margin and start < lenEst + margin: #this will only pass if the identified black is within our acceptible margins | |
selected_blacks.append(black) | |
if len(selected_blacks) == 0: | |
logging.info('No suitable black sections were found. Try changing the pixel ratio threshold and minimum black length (-t and -b)') | |
#exit(0) | |
continue | |
if len(selected_blacks) == 2: | |
secs1 = float(selected_blacks[0][0]) | |
secs2 = float(selected_blacks[1][0]) | |
if (secs2 - secs1) < 10: | |
logging.info('Two possible black sections identified but they are within 10 seconds. Using first as the episode break...') | |
selected_blacks = [selected_blacks[0]] | |
if len(selected_blacks) > 2: | |
print 'More than one black section has been identified.\nPlease select the section that you want to split on.' | |
for i in range(len(selected_blacks)): | |
mins = float(selected_blacks[i][0]) / 60 | |
secs = float(selected_blacks[i][0]) % 60 | |
print '%d: %d:%05.2f' % (i,mins,secs) | |
print '%d: Skip this video' % len(selected_blacks) | |
answer = '' | |
print 'Please enter your choice:', | |
while(True): | |
answer = raw_input() | |
try: | |
answer = int(answer) | |
except ValueError as verr: | |
print 'Try again, please enter a number:', | |
continue | |
if answer < len(selected_blacks) and answer >= 0: | |
print 'Continuing processing...' | |
break | |
elif answer == len(selected_blacks): | |
print 'Skipping...' | |
break | |
else: | |
print 'Please enter the number of one of the choices above:', | |
if answer == len(selected_blacks): | |
continue | |
else: | |
selected_blacks = [selected_blacks[answer]] | |
black = selected_blacks[0] | |
start = float(black[0]) | |
end = float(black[1]) | |
duration = float(black[2]) | |
#split = start + (duration/2) #calc the time to split the file as the duration of the black divided by 2 added on to the start | |
split = start | |
split_tc = (int(math.floor(split/60/60)),int(math.floor(split/60)),split%60) #calc the number of hours, minutes, seconds from number of seconds | |
logging.debug('Identified episode break: %s' % (black,)) | |
logging.debug('split time: %.4f' % split) | |
logging.debug('timecodes:%.4f:%.4f:%06.4f' % split_tc) | |
#now we want to execute the below on the video with the identified time code as the target of the split | |
#mkvmerge -o "test.mkv" --split "timecodes:00:xx:yy.zzzzzzz" "tvshow.mkv" | |
if fakeData == False: | |
time = "timecodes:%d:%d:%06.4f" % split_tc | |
logging.debug('Splitting with...'+time) | |
session = Popen(['mkvmerge', '-o', 'converted.mkv', '--split', time, vid], stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
res_text = session.communicate() | |
res_text = res_text[0] | |
else: | |
res_text = test_mkvmerge | |
if debug: print res_text | |
#check if split was successful | |
if re.search('Progress: 100%',res_text) != None: | |
logging.info('Great Success! Episodes split at %d:%d:%06.4f.' % split_tc) | |
#now lets do a rename | |
if numEps == 2 and tryRename == True: | |
fileName = os.path.basename(vid) | |
fileExt = os.path.splitext(vid)[1][1:] | |
name_info = re.findall(filename_pattern % fileExt,fileName) | |
#if the regex works | |
if len(name_info) > 0: | |
name_info = name_info[0] | |
show_name = name_info[0] | |
season = name_info[1] | |
episode1 = name_info[2] | |
episode2 = name_info[3] | |
quality = name_info[4] | |
epname1 = name_info[5] | |
epname2 = name_info[6] | |
new_name_1 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode1,quality=quality,epname=epname1,fileExt=fileExt) | |
new_name_2 = filename_new_pattern.format(show_name=show_name,season=season,episode=episode2,quality=quality,epname=epname2,fileExt=fileExt) | |
logging.info('File 1: ' + new_name_1) | |
logging.info('File 2: ' + new_name_2) | |
#Renaming files | |
os.rename('converted-001.mkv', new_name_1) | |
os.rename('converted-002.mkv', new_name_2) | |
logging.info('New episode files renamed!') | |
#regex didn't work | |
else: | |
logging.warning('Could not parse input file name, using original name') | |
name_ext = os.path.splitext(vid) | |
os.rename('converted-001.mkv',name_ext[0]+'-Part1'+name_ext[1]) | |
os.rename('converted-002.mkv',name_ext[0]+'-Part2'+name_ext[1]) | |
#split didn't work :( | |
else: | |
logging.error('The split may have failed') | |
continue |
Thank you for commenting, I'm glad to hear that this helped someone! I know it's a bit of a niche area but I figured someone would be able to use it some day.
hey there. thanks for your work and sharing.
i would love to use this script aswell;
but for series longer than 15 minutes, which are 99% of the time only 2-part episodes cut in half at almost exactly 50% of each file.
so there is no need to search the whole file, only like from 48% to 52% in the file.
is there any chance you could enhance the script that way? i'm searching for years for a working script :/
thank you
Hello, glad to help! It's been a long time since I've worked on this but I would be glad to take a look at it for you. Is it not working at all? Seems like you're saying it still works, this would just make it quicker?
I just came across your script and just wanted to say THANK YOU! This saved me a ton of time. I really appreciate all the work you put into this :)