twobob · June 26, 2022 23:53 · twobob · Jun 26, 2022
diff --git a/wpm.py b/wpm.py
 import youtube_dl
 import time
 import re
 import requests
 import os
 import sys

 # Get the arguments from the command-line except the filename
 argv = sys.argv[1:]
 Url = ""


 def down_sub(video_url, language):
    # check if valid youtube_link and remove playlist ID from url if exists.
    _temp = video_url.lower()
    if "youtube.com" in _temp or "youtu.be" in _temp:
        if '&list=' in video_url:
            video_url = video_url.split('&list=')[0].strip()

    ydl_opts = {'dump-json': True,
                'writesubtitles': True,
                'writeautomaticsub': True,
                'quiet': True,
                "no_warnings": True,
                "logtostderr": True,
                'youtube_include_dash_manifest': False}
    try:
        with youtube_dl.YoutubeDL(ydl_opts) as (ydl):
            info_dict = ydl.extract_info(video_url, download=False)
            # print(info_dict)
            if not info_dict['formats']:
                print("Status : Something went wrong retry or video is unavailable")
                return
    except:
        print('Error : Check your Internet Connection or Url.')
        return

    video_title = info_dict['title']

    # replacing reserved characters for windows for filename.
    video_name = re.sub("[\\\\/*?:\"<>|]", "", video_title)

    subtitles = info_dict.get('subtitles')
    automatic_captions = info_dict.get('automatic_captions')

    if subtitles:
        subtitle = subtitles.get(language)
        if subtitle:
            for fmt in subtitle:
                if fmt['ext'] == 'vtt':
                    sub_dlink = fmt['url']
                    return [sub_dlink, video_name]

    if automatic_captions:
        subtitle = automatic_captions.get(language)
        if subtitle:
            for fmt in subtitle:
                if fmt['ext'] == 'vtt':
                    sub_dlink = fmt['url']
                    return [sub_dlink, video_name]

 def main():

    # Check if the options' length is 1
    if len(argv) == 0 or len(argv) > 1:
        url = input('Please input a video URL: ')
        #print('using', url)
    else:
        url = argv[0]
        #print('using', url)

    ydl_opts = {
        'writesubtitles': True,
        'writeautomaticsub': True,
        'subtitlesformat': 'vtt',
        'subtitleslangs': ['en'],
        'noplaylist': True
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:

        #start = time.time()
        info_dict = ydl.extract_info(url, download=False)
        duration = info_dict.get('duration')
        #print('The duration of the video is: ' + str(duration) + ' seconds')

    subtitle = down_sub(url, 'en')
    puresub = False
    if subtitle:  # check if not None
        r = requests.get(subtitle[0])  # subtitle[0] download url.
        with open(os.path.join('subtitles', f"{subtitle[1]}.vtt"), 'wb') as f:
            parsed_subs = r.content.decode("utf-8")
            if "align:start" not in parsed_subs:
                puresub = True
                print('Pure Subs')
                saved_subs = r.content
            else:  # subtitle[1] video name.
                f.write(r.content)
        print('\n Subtitle Downloaded Successfully.')

    if not puresub:
        file = open(os.path.join('subtitles', f"{subtitle[1]}.vtt"))

        content = file.readlines()
        subs = content[6]
        new = re.sub(r'\d{2}\W\d{2}\W\d{2}\W\d{3}\s\W{3}\s\d{2}\W\d{2}\W\d{2}\W\d{3}', '', subs)
        # print(new)
        totalcount = len(new.split(' '))
    else:
        content = saved_subs.decode("utf-8")
        new = re.sub(r'\d{2}\W\d{2}\W\d{2}\W\d{3}\s\W{3}\s\d{2}\W\d{2}\W\d{2}\W\d{3}', '', content)
        flat = ' '.join(new.split())
        flat = flat.replace('WEBVTT Kind: captions Language: en', '')
        print(flat)
        totalcount = len(flat.split(' '))
    convert = time.strftime("%H:%M:%S", time.gmtime(duration))
    print(totalcount, 'total words in HMS:', convert)
    wpm = int(totalcount / (duration / 60))
    print(wpm, 'ish wpm')
    #elapsed = time.time() - start
    #print(f"Computed in {elapsed:.2f}", 'seconds')
    sys.exit(2)

 if __name__ == '__main__':
    main()
	import youtube_dl
	import time
	import re
	import requests
	import os
	import sys

	# Get the arguments from the command-line except the filename
	argv = sys.argv[1:]
	Url = ""


	def down_sub(video_url, language):
	# check if valid youtube_link and remove playlist ID from url if exists.
	_temp = video_url.lower()
	if "youtube.com" in _temp or "youtu.be" in _temp:
	if '&list=' in video_url:
	video_url = video_url.split('&list=')[0].strip()

	ydl_opts = {'dump-json': True,
	'writesubtitles': True,
	'writeautomaticsub': True,
	'quiet': True,
	"no_warnings": True,
	"logtostderr": True,
	'youtube_include_dash_manifest': False}
	try:
	with youtube_dl.YoutubeDL(ydl_opts) as (ydl):
	info_dict = ydl.extract_info(video_url, download=False)
	# print(info_dict)
	if not info_dict['formats']:
	print("Status : Something went wrong retry or video is unavailable")
	return
	except:
	print('Error : Check your Internet Connection or Url.')
	return

	video_title = info_dict['title']

	# replacing reserved characters for windows for filename.
	video_name = re.sub("[\\\\/*?:\"<>\|]", "", video_title)

	subtitles = info_dict.get('subtitles')
	automatic_captions = info_dict.get('automatic_captions')

	if subtitles:
	subtitle = subtitles.get(language)
	if subtitle:
	for fmt in subtitle:
	if fmt['ext'] == 'vtt':
	sub_dlink = fmt['url']
	return [sub_dlink, video_name]

	if automatic_captions:
	subtitle = automatic_captions.get(language)
	if subtitle:
	for fmt in subtitle:
	if fmt['ext'] == 'vtt':
	sub_dlink = fmt['url']
	return [sub_dlink, video_name]

	def main():

	# Check if the options' length is 1
	if len(argv) == 0 or len(argv) > 1:
	url = input('Please input a video URL: ')
	#print('using', url)
	else:
	url = argv[0]
	#print('using', url)

	ydl_opts = {
	'writesubtitles': True,
	'writeautomaticsub': True,
	'subtitlesformat': 'vtt',
	'subtitleslangs': ['en'],
	'noplaylist': True
	}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:

	#start = time.time()
	info_dict = ydl.extract_info(url, download=False)
	duration = info_dict.get('duration')
	#print('The duration of the video is: ' + str(duration) + ' seconds')

	subtitle = down_sub(url, 'en')
	puresub = False
	if subtitle: # check if not None
	r = requests.get(subtitle[0]) # subtitle[0] download url.
	with open(os.path.join('subtitles', f"{subtitle[1]}.vtt"), 'wb') as f:
	parsed_subs = r.content.decode("utf-8")
	if "align:start" not in parsed_subs:
	puresub = True
	print('Pure Subs')
	saved_subs = r.content
	else: # subtitle[1] video name.
	f.write(r.content)
	print('\n Subtitle Downloaded Successfully.')

	if not puresub:
	file = open(os.path.join('subtitles', f"{subtitle[1]}.vtt"))

	content = file.readlines()
	subs = content[6]
	new = re.sub(r'\d{2}\W\d{2}\W\d{2}\W\d{3}\s\W{3}\s\d{2}\W\d{2}\W\d{2}\W\d{3}', '', subs)
	# print(new)
	totalcount = len(new.split(' '))
	else:
	content = saved_subs.decode("utf-8")
	new = re.sub(r'\d{2}\W\d{2}\W\d{2}\W\d{3}\s\W{3}\s\d{2}\W\d{2}\W\d{2}\W\d{3}', '', content)
	flat = ' '.join(new.split())
	flat = flat.replace('WEBVTT Kind: captions Language: en', '')
	print(flat)
	totalcount = len(flat.split(' '))
	convert = time.strftime("%H:%M:%S", time.gmtime(duration))
	print(totalcount, 'total words in HMS:', convert)
	wpm = int(totalcount / (duration / 60))
	print(wpm, 'ish wpm')
	#elapsed = time.time() - start
	#print(f"Computed in {elapsed:.2f}", 'seconds')
	sys.exit(2)

	if __name__ == '__main__':
	main()