Skip to content

Instantly share code, notes, and snippets.

@paulwababu
Created March 15, 2023 20:17
Show Gist options
  • Save paulwababu/b185dfdd37b4d71930c9d1b4f341cc40 to your computer and use it in GitHub Desktop.
Save paulwababu/b185dfdd37b4d71930c9d1b4f341cc40 to your computer and use it in GitHub Desktop.
import urllib.request
from bs4 import BeautifulSoup
from urllib.parse import urljoin
# Replace the URL below with the URL of the HTML file you want to download src links from
base_url = "https://themes.3rdwavemedia.com/profolio/bs5/"
html_file = "index.html"
url = urljoin(base_url, html_file)
# Fetch the HTML content of the URL and parse it using BeautifulSoup
html_content = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html_content, 'html.parser')
# Find all elements with a 'src' or 'href' attribute
elements_with_links = soup.findAll(lambda tag: tag.has_attr('src') or tag.has_attr('href'))
# Download all 'src' and 'href' links
for element in elements_with_links:
if element.has_attr('src'):
link = element['src']
else:
link = element['href']
ext = link.split('.')[-1].lower()
if ext in ['css', 'js', 'png', 'jpg', 'jpeg']:
full_url = urljoin(base_url, link)
filename = full_url.split("/")[-1] # get the filename from the URL
try:
urllib.request.urlretrieve(full_url, filename)
print("Downloaded from URL:", full_url)
except Exception as e:
print(f"Error downloading from URL: {full_url}. {e}")
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment