Skip to content

Instantly share code, notes, and snippets.

@ngshaohui
Created January 1, 2025 10:23
Show Gist options
  • Save ngshaohui/188a674db088ac0634f1afe98949c16f to your computer and use it in GitHub Desktop.
Save ngshaohui/188a674db088ac0634f1afe98949c16f to your computer and use it in GitHub Desktop.
"""
script to download all media files from a fotoshare album
the page first needs to be downloaded as a html file with the browser or curl first
using requests library to download the page encounters a cloudflare protection page
this script simply extracts the possible media URLs to be downloaded
"""
import re
import os
import requests
SAVE_LOCATION = "media"
if not os.path.exists(SAVE_LOCATION):
os.makedirs(SAVE_LOCATION)
with open("page.html", "r") as f:
st = f.read()
# find strings with prefix
media_urls = re.findall(
r"https:\/\/cdn-bz.fotoshare.co\/b.*\..{3}", st)
unique_urls = list(set(media_urls))
print(f"found {len(unique_urls)} urls")
for url in unique_urls:
try:
# Get the file name from the URL
file_name = url.split("/")[-1]
output_path = os.path.join(SAVE_LOCATION, file_name)
# Send GET request to the URL
response = requests.get(url)
response.raise_for_status() # Check for HTTP request errors
# Write the content to a file
with open(output_path, "wb") as file:
file.write(response.content)
print(f"Downloaded: {file_name}")
except requests.exceptions.RequestException as e:
print(f"Failed to download {url}: {e}")
print("done")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment