ngshaohui · January 1, 2025 10:23
diff --git a/fotoshare.py b/fotoshare.py
 """
 script to download all media files from a fotoshare album

 the page first needs to be downloaded as a html file with the browser or curl first

 using requests library to download the page encounters a cloudflare protection page

 this script simply extracts the possible media URLs to be downloaded
 """

 import re
 import os

 import requests

 SAVE_LOCATION = "media"

 if not os.path.exists(SAVE_LOCATION):
    os.makedirs(SAVE_LOCATION)

 with open("page.html", "r") as f:
    st = f.read()
    # find strings with prefix
    media_urls = re.findall(
        r"https:\/\/cdn-bz.fotoshare.co\/b.*\..{3}", st)
    unique_urls = list(set(media_urls))
    print(f"found {len(unique_urls)} urls")

    for url in unique_urls:
        try:
            # Get the file name from the URL
            file_name = url.split("/")[-1]
            output_path = os.path.join(SAVE_LOCATION, file_name)

            # Send GET request to the URL
            response = requests.get(url)
            response.raise_for_status()  # Check for HTTP request errors

            # Write the content to a file
            with open(output_path, "wb") as file:
                file.write(response.content)

            print(f"Downloaded: {file_name}")
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {url}: {e}")

    print("done")
	"""
	script to download all media files from a fotoshare album

	the page first needs to be downloaded as a html file with the browser or curl first

	using requests library to download the page encounters a cloudflare protection page

	this script simply extracts the possible media URLs to be downloaded
	"""

	import re
	import os

	import requests

	SAVE_LOCATION = "media"

	if not os.path.exists(SAVE_LOCATION):
	os.makedirs(SAVE_LOCATION)

	with open("page.html", "r") as f:
	st = f.read()
	# find strings with prefix
	media_urls = re.findall(
	r"https:\/\/cdn-bz.fotoshare.co\/b.*\..{3}", st)
	unique_urls = list(set(media_urls))
	print(f"found {len(unique_urls)} urls")

	for url in unique_urls:
	try:
	# Get the file name from the URL
	file_name = url.split("/")[-1]
	output_path = os.path.join(SAVE_LOCATION, file_name)

	# Send GET request to the URL
	response = requests.get(url)
	response.raise_for_status() # Check for HTTP request errors

	# Write the content to a file
	with open(output_path, "wb") as file:
	file.write(response.content)

	print(f"Downloaded: {file_name}")
	except requests.exceptions.RequestException as e:
	print(f"Failed to download {url}: {e}")

	print("done")