Skip to content

Instantly share code, notes, and snippets.

@twobob
Created June 22, 2022 23:36
Show Gist options
  • Save twobob/48e72cfe0930e5daa829edca15e57ed0 to your computer and use it in GitHub Desktop.
Save twobob/48e72cfe0930e5daa829edca15e57ed0 to your computer and use it in GitHub Desktop.
Fast and Rough WPM approximation using video duration and vtt - FFMPEG - srt files
import youtube_dl
import time
import re
import requests
import os
def downSub(video_url,language):
# check if valid youtube_link and remove playlist ID from url if exists.
_temp = video_url.lower()
if 'youtube.com' in _temp or 'youtu.be' in _temp:
if '&list=' in video_url:
video_url = video_url.split('&list=')[0].strip()
ydl_opts = {'dump-json':True,
'writesubtitles':True,
'writeautomaticsub':True,
'youtube_include_dash_manifest':False}
try:
with youtube_dl.YoutubeDL(ydl_opts) as (ydl):
info_dict = ydl.extract_info(video_url, download=False)
#print(info_dict)
if not info_dict['formats']:
print(text=' Status : Something went wrong retry or video is unavailable')
return
except:
print('Error : Check your Internet Connection or Url.')
return
video_title = info_dict['title']
# replacing reserved characters for windows for filename.
video_name = re.sub('[\\\\/*?:"<>|]', '', video_title)
subtitles = info_dict.get('subtitles')
automatic_captions = info_dict.get('automatic_captions')
if subtitles:
subtitle = subtitles.get(language)
if subtitle:
for fmt in subtitle:
if fmt['ext']=='vtt':
sub_dlink = fmt['url']
return [sub_dlink,video_name]
if automatic_captions:
subtitle = automatic_captions.get(language)
if subtitle:
for fmt in subtitle:
if fmt['ext']=='vtt':
sub_dlink = fmt['url']
return [sub_dlink,video_name]
def main():
# put your code here
ydl_opts = {
'writesubtitles': True,
#'skipdownload': True,
'writeautomaticsub': True,
'subtitlesformat': 'srt',
'subtitleslangs': ['en']
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
Url = input('Please input a video URL: ')
start = time.time()
if len(Url) ==0:
Url = "https://www.youtube.com/watch?v=8HZbaYc0qhc"
info_dict = ydl.extract_info(Url, download=False)
duration = info_dict.get('duration')
print('The duration of the video is: ' + str(duration) + ' seconds')
subtitle = downSub(Url, 'en')
if subtitle: # check if not None
r = requests.get(subtitle[0]) # subtitle[0] download url.
with open(os.path.join('subtitles',f"{subtitle[1]}.vtt"),'wb') as f: # subtitle[1] video name.
f.write(r.content)
print('\n Subtitle Downloaded Successfully.')
os.system(f'ffmpeg -i {subtitle[1]}.vtt subs.srt')
file = open('subs.srt')
content = file.readlines()
subs = content[3]
#print(subs)
totalcount = len(subs.split(' '))
print(totalcount, 'total')
i=0
for words in subs.split(' '):
i=i+1;
print('['+str(i)+'|', end= words+'] ')
wpm = totalcount / (duration / 60)
print(wpm,'wpm')
elapsed = time.time() - start
print(elapsed, 'seconds')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment