Last active
February 23, 2019 23:51
-
-
Save vinkrish/6de77fcc1e9bb1ded1281172e4164e43 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Creates readable text file from SRT file. | |
""" | |
import re, sys | |
import os | |
def clean_up(lines): | |
""" | |
Get rid of all non-text lines and | |
try to combine text broken into multiple lines | |
""" | |
srt_count = 1 | |
new_lines = [] | |
escape_timestamp = False | |
for line in lines: | |
if str(srt_count) == line.strip(): | |
srt_count += 1 | |
escape_timestamp = True | |
continue | |
elif escape_timestamp: | |
escape_timestamp = False | |
elif line == '\n': | |
continue | |
else: | |
new_lines.append(line) | |
return new_lines | |
def main(args): | |
""" | |
Loops through the folder and creates extracted folder and copies content of file | |
""" | |
for (dirpath, dirnames, filenames) in os.walk('/Users/vinkrish/Documents/Intro to Statistics Subtitles'): | |
headDir, tailDir = os.path.split(dirpath) | |
newDir = '/Users/vinkrish/Documents/extracted/' + tailDir | |
if not os.path.exists(newDir): | |
try: | |
os.mkdir(newDir) | |
except OSError: | |
print ("Creation of the directory failed") | |
for file_name in filenames: | |
head, tail = os.path.split(file_name) | |
# os.rename(dirpath + "/" + file_name, dirpath + "/" + os.path.splitext(file_name)[0] + '.txt') | |
if file_name != '.DS_Store.txt': | |
print(file_name) | |
with open(dirpath + "/" + file_name) as f: | |
lines = f.readlines() | |
print(len(lines)) | |
new_lines = clean_up(lines) | |
new_file_name = newDir + "/" + tail | |
with open(new_file_name, 'w') as newFile: | |
for line in new_lines: | |
newFile.write(line) | |
if __name__ == '__main__': | |
main(sys.argv) | |
""" | |
NOTES | |
* Run from command line as | |
** python subtitle-extract.py | |
* Creates file_name.txt with extracted text from file_name.srt | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment