Skip to content

Instantly share code, notes, and snippets.

@twobob
Last active June 23, 2022 00:43
Show Gist options
  • Save twobob/94ae44e62476d4ee65759792913a3d4a to your computer and use it in GitHub Desktop.
Save twobob/94ae44e62476d4ee65759792913a3d4a to your computer and use it in GitHub Desktop.
Fast and rough mean WPM approx using vtt files and meta info for approximation
import youtube_dl
import time
import re
import requests
import os
def downSub(video_url,language):
# check if valid youtube_link and remove playlist ID from url if exists.
_temp = video_url.lower()
if 'youtube.com' in _temp or 'youtu.be' in _temp:
if '&list=' in video_url:
video_url = video_url.split('&list=')[0].strip()
ydl_opts = {'dump-json':True,
'writesubtitles':True,
'writeautomaticsub':True,
'youtube_include_dash_manifest':False}
try:
with youtube_dl.YoutubeDL(ydl_opts) as (ydl):
info_dict = ydl.extract_info(video_url, download=False)
#print(info_dict)
if not info_dict['formats']:
print(text=' Status : Something went wrong retry or video is unavailable')
return
except:
print('Error : Check your Internet Connection or Url.')
return
video_title = info_dict['title']
# replacing reserved characters for windows for filename.
video_name = re.sub('[\\\\/*?:"<>|]', '', video_title)
subtitles = info_dict.get('subtitles')
automatic_captions = info_dict.get('automatic_captions')
if subtitles:
subtitle = subtitles.get(language)
if subtitle:
for fmt in subtitle:
if fmt['ext']=='vtt':
sub_dlink = fmt['url']
return [sub_dlink,video_name]
if automatic_captions:
subtitle = automatic_captions.get(language)
if subtitle:
for fmt in subtitle:
if fmt['ext']=='vtt':
sub_dlink = fmt['url']
return [sub_dlink,video_name]
def main():
# put your code here
ydl_opts = {
'writesubtitles': True,
#'skipdownload': True,
'writeautomaticsub': True,
'subtitlesformat': 'vtt',
'subtitleslangs': ['en']
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
Url = input('Please input a video URL: ')
start = time.time()
if len(Url) ==0:
Url = "https://www.youtube.com/watch?v=8HZbaYc0qhc"
else:
print ('using', Url)
info_dict = ydl.extract_info(Url, download=False)
duration = info_dict.get('duration')
print('The duration of the video is: ' + str(duration) + ' seconds')
subtitle = downSub(Url, 'en')
if subtitle: # check if not None
r = requests.get(subtitle[0]) # subtitle[0] download url.
with open(os.path.join('subtitles',f"{subtitle[1]}.vtt"),'wb') as f: # subtitle[1] video name.
f.write(r.content)
print('\n Subtitle Downloaded Successfully.')
file = open(os.path.join('subtitles',f"{subtitle[1]}.vtt"))
content = file.readlines()
subs = content[6]
new = re.sub(r'\d{2}\W\d{2}\W\d{2}\W\d{3}\s\W{3}\s\d{2}\W\d{2}\W\d{2}\W\d{3}','',subs)
print(new)
totalcount = len(new.split(' '))
print(totalcount, 'total')
# i=0
# for words in subs.split('<c>'):
# i=i+1;
# print('['+str(i)+'|', end= words+'] ')
wpm = totalcount / (duration / 60)
print(wpm,'wpm')
elapsed = time.time() - start
print(elapsed, 'seconds')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment