Created
June 22, 2022 23:36
-
-
Save twobob/48e72cfe0930e5daa829edca15e57ed0 to your computer and use it in GitHub Desktop.
Fast and Rough WPM approximation using video duration and vtt - FFMPEG - srt files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import youtube_dl | |
import time | |
import re | |
import requests | |
import os | |
def downSub(video_url,language): | |
# check if valid youtube_link and remove playlist ID from url if exists. | |
_temp = video_url.lower() | |
if 'youtube.com' in _temp or 'youtu.be' in _temp: | |
if '&list=' in video_url: | |
video_url = video_url.split('&list=')[0].strip() | |
ydl_opts = {'dump-json':True, | |
'writesubtitles':True, | |
'writeautomaticsub':True, | |
'youtube_include_dash_manifest':False} | |
try: | |
with youtube_dl.YoutubeDL(ydl_opts) as (ydl): | |
info_dict = ydl.extract_info(video_url, download=False) | |
#print(info_dict) | |
if not info_dict['formats']: | |
print(text=' Status : Something went wrong retry or video is unavailable') | |
return | |
except: | |
print('Error : Check your Internet Connection or Url.') | |
return | |
video_title = info_dict['title'] | |
# replacing reserved characters for windows for filename. | |
video_name = re.sub('[\\\\/*?:"<>|]', '', video_title) | |
subtitles = info_dict.get('subtitles') | |
automatic_captions = info_dict.get('automatic_captions') | |
if subtitles: | |
subtitle = subtitles.get(language) | |
if subtitle: | |
for fmt in subtitle: | |
if fmt['ext']=='vtt': | |
sub_dlink = fmt['url'] | |
return [sub_dlink,video_name] | |
if automatic_captions: | |
subtitle = automatic_captions.get(language) | |
if subtitle: | |
for fmt in subtitle: | |
if fmt['ext']=='vtt': | |
sub_dlink = fmt['url'] | |
return [sub_dlink,video_name] | |
def main(): | |
# put your code here | |
ydl_opts = { | |
'writesubtitles': True, | |
#'skipdownload': True, | |
'writeautomaticsub': True, | |
'subtitlesformat': 'srt', | |
'subtitleslangs': ['en'] | |
} | |
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
Url = input('Please input a video URL: ') | |
start = time.time() | |
if len(Url) ==0: | |
Url = "https://www.youtube.com/watch?v=8HZbaYc0qhc" | |
info_dict = ydl.extract_info(Url, download=False) | |
duration = info_dict.get('duration') | |
print('The duration of the video is: ' + str(duration) + ' seconds') | |
subtitle = downSub(Url, 'en') | |
if subtitle: # check if not None | |
r = requests.get(subtitle[0]) # subtitle[0] download url. | |
with open(os.path.join('subtitles',f"{subtitle[1]}.vtt"),'wb') as f: # subtitle[1] video name. | |
f.write(r.content) | |
print('\n Subtitle Downloaded Successfully.') | |
os.system(f'ffmpeg -i {subtitle[1]}.vtt subs.srt') | |
file = open('subs.srt') | |
content = file.readlines() | |
subs = content[3] | |
#print(subs) | |
totalcount = len(subs.split(' ')) | |
print(totalcount, 'total') | |
i=0 | |
for words in subs.split(' '): | |
i=i+1; | |
print('['+str(i)+'|', end= words+'] ') | |
wpm = totalcount / (duration / 60) | |
print(wpm,'wpm') | |
elapsed = time.time() - start | |
print(elapsed, 'seconds') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment