Created
November 22, 2017 22:28
-
-
Save tutysara/27b2a06e461e3ebab1b360dadaaa5872 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
__author__ = 'AxelAli' | |
import os | |
import sys | |
#MADE BY AXEL ALI | |
#https://github.com/AxelAli | |
#USAGE: | |
# $ VTT Formatter.py [DIR] | |
# $ VTT Formatter.py ./subs | |
import re | |
print "Searching inside : "+sys.argv[1] | |
newdirectory = sys.argv[1]+"txt" | |
print "newdirectory", newdirectory | |
if not os.path.exists(newdirectory): | |
os.makedirs(newdirectory) #Creates a newdirectory | |
combined = open(os.path.join(newdirectory,"allcombined.txt"),'w') #Creates a file of all the subs combined for dataset | |
for file in os.listdir(sys.argv[1]): #Gets All the files inside DIR (Argument1) | |
if file.endswith(".vtt"): | |
print "Formating : "+file #Shows current File | |
newfile = open(os.path.join(newdirectory,file.replace(".vtt", ".txt")),'w') #Creates a newfile | |
with open(os.path.join(sys.argv[1], file)) as f: | |
contents_reached = False | |
for line in f: #a line for each file | |
# skip all lines meta deta till time line | |
if '-->' in line : #Couldnt Get the "or" working | |
contents_reached = True | |
continue | |
if contents_reached: | |
line = re.sub('<[^>]*>', '', line) | |
#print line, len(line) | |
if len(line) > 1: | |
newfile.write("%s" % line) #Add new line to new file | |
combined.write("%s" % line) #Add new line to combinedfile | |
print "DONE!" #NEXT ONE! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment