bitsnaps · April 16, 2020 13:40
diff --git a/links_grabber.py b/links_grabber.py
 #!/Users/username/.conda/envs/py3/bin/python
 # remove the 1st line if you don't want to use conda's python env "py3" in this case
 import requests
 from bs4 import BeautifulSoup
 import re
 import urllib
 import ntpath, sys
 import time, datetime

 # Copy video id from wistia player (right click then copy link) and paste it here
 tag_content = '''
 <p><a href="https://website.com/full/link/to/lectures/11891982?wvideo=Aqn3c4exu7"><img src="https://embed-ssl.wistia.com/deliveries/6f4b905a9C6TC60fdac7752077be458b.jpg?image_crop_resized=800x450&amp;image_play_button_size=2x&amp;image_play_button=1&amp;image_play_button_color=ff9a83e0" width="400" height="225" style="width: 400px; height: 225px;"></a></p><p><a href="https://website.com/link/to/lectures/11891982?wvideo=Aqn3c4exu7">Title of the Course | Category</a></p>
 '''
 # path where to save the video
 path = '/Users/username/where/to/save/Videos/'

 def get_video_info():
    soup = BeautifulSoup(tag_content, features="html.parser")
    urls = soup.find_all('a', href=True)
    url = urls[-1]
    href = url['href']
    params = urllib.parse.urlparse(href)
    link = urllib.parse.parse_qs(params.query)
    return (link['wvideo'][0], url.get_text())

 (video_id, link_text) = get_video_info()
 print('VideoID: %s' % video_id)
 print('Title: %s' % link_text)

 url = "http://fast.wistia.net/embed/iframe/"+video_id
 name = re.sub(r'[\\/*?:"<>|]','_', link_text.split('|')[0].strip()) # remove unwanted chars
 response = requests.get(url)
 soup = BeautifulSoup(response.content, features="html.parser")
 script = soup.find_all('script')[-1]
 # grab the last script
 page = str(script.contents[0])

 def sizeof_file(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

 def size_on_disk(filename):
    f=open(filename,'rb')
    size = len(f.read())
    f.close()
    return size

 # Report hook for downloading progress
 def reporthook(blocknum, blocksize, totalsize):
    global start_time
    global filesize
    if blocknum == 0:
        start_time = time.time()
        filesize = sizeof_file(totalsize)
        return
    duration = time.time() - start_time
    elapsed_time = str(datetime.timedelta(seconds=int(duration)))
    readsofar = int(blocknum * blocksize)
    speed = int(readsofar / (1024 * duration))
    estimated_seconds = int(totalsize / (speed * 1024))
    estimated_duration = str(datetime.timedelta(seconds=estimated_seconds))
    if totalsize > 0:
        percent = readsofar * 1e2 / totalsize
        s = "\r%5.1f%% %s / %s, %d Kb/s, %s estimated, %s passed " % (
            percent, sizeof_file(readsofar), filesize, speed, estimated_duration, elapsed_time)
        sys.stderr.write(s)
        if readsofar >= totalsize: # near the end
            sys.stderr.write("\n")
    else: # total size is unknown
        sys.stderr.write("read %d\n" % (readsofar,))

 if 'embed.wistia.com/deliveries' in page:
    urls = re.findall('((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)', page)
    if (len(urls) > 0):
        print('Found: %d links.' % len(urls))
        fileurl = str(urls[0][0])
        print(fileurl.replace('.bin','.mp4'))
        network_obj = urllib.request.urlopen(fileurl)
        filesize = sizeof_file(int(network_obj.info()['Content-Length']))
        print('File size: %s' % filesize)
        sys.stdout.write('Do you want to proceed for download (y/n) ?')
        if input().strip().lower() in ['y','yes']:
            # Download file
            # filepath = path+ntpath.basename(fileurl.replace('.bin','.mp4'))
            filepath = path+name+'.mp4'
            urllib.request.urlretrieve(fileurl, filepath, reporthook)
            print('Saved to: %s' % filepath)
            print('File size on disk: %s' % sizeof_file(size_on_disk(filepath)))
        else:
            print('Abort downloading.')
    else:
        print('Could not find any url.')
 else:
    print("Page doesn't contains any url:")
    print(page)
	#!/Users/username/.conda/envs/py3/bin/python
	# remove the 1st line if you don't want to use conda's python env "py3" in this case
	import requests
	from bs4 import BeautifulSoup
	import re
	import urllib
	import ntpath, sys
	import time, datetime

	# Copy video id from wistia player (right click then copy link) and paste it here
	tag_content = '''
	<p><a href="https://website.com/full/link/to/lectures/11891982?wvideo=Aqn3c4exu7"><img src="https://embed-ssl.wistia.com/deliveries/6f4b905a9C6TC60fdac7752077be458b.jpg?image_crop_resized=800x450&image_play_button_size=2x&image_play_button=1&image_play_button_color=ff9a83e0" width="400" height="225" style="width: 400px; height: 225px;"></a></p><p><a href="https://website.com/link/to/lectures/11891982?wvideo=Aqn3c4exu7">Title of the Course \| Category</a></p>
	'''
	# path where to save the video
	path = '/Users/username/where/to/save/Videos/'

	def get_video_info():
	soup = BeautifulSoup(tag_content, features="html.parser")
	urls = soup.find_all('a', href=True)
	url = urls[-1]
	href = url['href']
	params = urllib.parse.urlparse(href)
	link = urllib.parse.parse_qs(params.query)
	return (link['wvideo'][0], url.get_text())

	(video_id, link_text) = get_video_info()
	print('VideoID: %s' % video_id)
	print('Title: %s' % link_text)

	url = "http://fast.wistia.net/embed/iframe/"+video_id
	name = re.sub(r'[\\/*?:"<>\|]','_', link_text.split('\|')[0].strip()) # remove unwanted chars
	response = requests.get(url)
	soup = BeautifulSoup(response.content, features="html.parser")
	script = soup.find_all('script')[-1]
	# grab the last script
	page = str(script.contents[0])

	def sizeof_file(num, suffix='B'):
	for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
	if abs(num) < 1024.0:
	return "%3.1f%s%s" % (num, unit, suffix)
	num /= 1024.0
	return "%.1f%s%s" % (num, 'Yi', suffix)

	def size_on_disk(filename):
	f=open(filename,'rb')
	size = len(f.read())
	f.close()
	return size

	# Report hook for downloading progress
	def reporthook(blocknum, blocksize, totalsize):
	global start_time
	global filesize
	if blocknum == 0:
	start_time = time.time()
	filesize = sizeof_file(totalsize)
	return
	duration = time.time() - start_time
	elapsed_time = str(datetime.timedelta(seconds=int(duration)))
	readsofar = int(blocknum * blocksize)
	speed = int(readsofar / (1024 * duration))
	estimated_seconds = int(totalsize / (speed * 1024))
	estimated_duration = str(datetime.timedelta(seconds=estimated_seconds))
	if totalsize > 0:
	percent = readsofar * 1e2 / totalsize
	s = "\r%5.1f%% %s / %s, %d Kb/s, %s estimated, %s passed " % (
	percent, sizeof_file(readsofar), filesize, speed, estimated_duration, elapsed_time)
	sys.stderr.write(s)
	if readsofar >= totalsize: # near the end
	sys.stderr.write("\n")
	else: # total size is unknown
	sys.stderr.write("read %d\n" % (readsofar,))

	if 'embed.wistia.com/deliveries' in page:
	urls = re.findall('((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+\|(?:www.\|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w_])?\??(?:[-\+=&;%@.\w_])#?(?:[\w]*))?)', page)
	if (len(urls) > 0):
	print('Found: %d links.' % len(urls))
	fileurl = str(urls[0][0])
	print(fileurl.replace('.bin','.mp4'))
	network_obj = urllib.request.urlopen(fileurl)
	filesize = sizeof_file(int(network_obj.info()['Content-Length']))
	print('File size: %s' % filesize)
	sys.stdout.write('Do you want to proceed for download (y/n) ?')
	if input().strip().lower() in ['y','yes']:
	# Download file
	# filepath = path+ntpath.basename(fileurl.replace('.bin','.mp4'))
	filepath = path+name+'.mp4'
	urllib.request.urlretrieve(fileurl, filepath, reporthook)
	print('Saved to: %s' % filepath)
	print('File size on disk: %s' % sizeof_file(size_on_disk(filepath)))
	else:
	print('Abort downloading.')
	else:
	print('Could not find any url.')
	else:
	print("Page doesn't contains any url:")
	print(page)
No results found