Skip to content

Instantly share code, notes, and snippets.

@vikramsoni2
Last active December 16, 2022 16:31
Show Gist options
  • Save vikramsoni2/b8adb6f2e32d2e1a918b900a20b39af9 to your computer and use it in GitHub Desktop.
Save vikramsoni2/b8adb6f2e32d2e1a918b900a20b39af9 to your computer and use it in GitHub Desktop.
streamline app
import streamlit as st
import requests, re
from bs4 import BeautifulSoup
import urllib.request
URL = st.text_input('URL', '')
download = st.button('DOWNLOAD')
import time
my_bar = st.progress(0)
def show_progress(block_num, block_size, total_size):
percent_complete = round(((block_num * block_size) / total_size) * 100)
my_bar.progress(percent_complete)
if download and len(URL) > 10:
my_bar.progress(1)
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
# Filename
title = soup.find('h1', attrs={'class':'main-h1'}).text
title = re.sub( r"[^a-zA-Z ]", "", title)
header = soup.find('h1', attrs={'class':'main-h1'}).parent
print(header)
actress = [l for l in header.ul.find_all('li') if "featuring" in l.text]
if len(actress):
actress_text = actress[0].text
filename = title + " - "+ actress_text
# categries
categories = soup.find("h3", string="This video belongs to the following categories").parent.find_all('a', attrs={'class':'tag-link'})
categories_text = [c.text for c in categories]
#', '.join(categories_text)
# target link
player_wrapper = soup.find(name='div', attrs={'id':'playerWrapper'})
iframe = player_wrapper.find('iframe')
response = requests.get("http:"+iframe.attrs['src'])
iframe_soup = BeautifulSoup(response.content)
body = iframe_soup.find('body')
script_tag = [s.text for s in body.find_all('script') if "</video>" in s.text]
if len(script_tag):
script = script_tag[0]
video_tag = re.search("(?=<video).*(?=</video>)", script, re.IGNORECASE).group(0)
video_soup = BeautifulSoup(video_tag)
sources = video_soup.find_all('source')
links = [s.attrs['src'].replace('\\"', '') for s in sources]
target = 0
if len([l for l in links if '1080' in l]):
target = [l for l in links if '1080' in l][0]
elif len([l for l in links if '720' in l]):
target = [l for l in links if '720' in l][0]
if target:
urllib.request.urlretrieve("http:"+target, f"downloads/{filename}.mp4", show_progress)
st.write(f"Done! file saved as {filename}")
beautifulsoup4==4.11.1
requests==2.28.1
streamlit==1.16.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment