Created
October 8, 2020 21:11
-
-
Save ClearlyKyle/4fbc38a2bf9ce4a893e09dafd00bf849 to your computer and use it in GitHub Desktop.
Using pysrt to convert a SRT file to a list of sentences.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pysrt | |
| import os | |
| output_folder_name = "txt" | |
| try: | |
| os.mkdir("./{}".format(output_folder_name)) | |
| except OSError: | |
| pass # already exists | |
| for file in os.listdir("."): | |
| if file.endswith(".srt"): | |
| print("Converting: ", file) | |
| output_name = file.split('.')[0] + ".txt" | |
| subs = pysrt.open(file) | |
| sentences = [] | |
| line = "" | |
| for sub in subs: | |
| # +' ' because some sentences span over multiple time stamps | |
| line += str(sub.text).replace('\n', ' ') + ' ' | |
| for letter in line: | |
| # finding end of sentence | |
| if letter == ".": | |
| # saving sentence and removing any new line characters | |
| sentences.append(line.replace('\n', ' ')) | |
| line = "" | |
| with open(os.path.join("./{}/".format(output_folder_name) + output_name), encoding='utf-8', mode='w') as output_file: | |
| output_file.writelines("%s\n" % sentence for sentence in sentences) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment