Akasurde · October 27, 2020 08:24
diff --git a/main.py b/main.py
 import requests
 from bs4 import BeautifulSoup
 import os
 import shutil


 url = ""

 response = requests.get(url)
 soup = BeautifulSoup(response.text, "html.parser")

 def download_stream(url, filepath):
    """
    Stream stuff from the Internet to a local file.
    """
    response = requests.get(url, stream=True)
    with open(filepath, "wb") as out_file:
        shutil.copyfileobj(response.raw, out_file)

 def _save_image(url, imagename):
    """
    Skips downloading if the image is already downloaded,
    otherwise downloads it locally.
    """
    print("Downloading", imagename)
    already_present = os.listdir(".")
    if imagename in already_present:
        return
    download_stream(url, imagename)

 def _convert_jsonp_url_to_image_url(jsonp_url, found):
    """
    Gets the image URL corresponding to the '.jsonp' URL.
    """
    if jsonp_url.endswith(".jsonp"):
        replacement = jsonp_url.replace("/pages/", "/images/")
        if found:
            replacement = replacement.replace(".jsonp", "/000.jpg")
        else:
            replacement = replacement.replace(".jsonp", ".jpg")
    else:
        replacement = jsonp_url
    return replacement

 def _extract_jsonp_url(inner_opening):
    """
    Extracts URLs ending with '.jsonp'. These URLs contain the
    raw document text.
    """
    portion1 = inner_opening.find("https://")

    if portion1 == -1:
        jsonp = None
    else:
        portion2 = inner_opening.find(".jsonp")
        jsonp = inner_opening[portion1 : portion2 + 6]

    return jsonp

 js_text = soup.find_all("script", type="text/javascript")
 jsonp_urls = []
 for opening in js_text:
    for inner_opening in opening:
        jsonp = _extract_jsonp_url(inner_opening)
        if jsonp:
            jsonp_urls.append(jsonp)

 page_counter = 1
 initial_filename = "hacking_"
 found = False
 downloaded_images = []
 for jsonp_url in jsonp_urls:
    filename = "{}_{}.jpg".format(initial_filename, page_counter)
    img_url = _convert_jsonp_url_to_image_url(jsonp_url, found=found)
    img_url = _save_image(img_url, filename)
    downloaded_images.append(filename)
    page_counter += 1

 print(jsonp_urls)
	import requests
	from bs4 import BeautifulSoup
	import os
	import shutil


	url = ""

	response = requests.get(url)
	soup = BeautifulSoup(response.text, "html.parser")

	def download_stream(url, filepath):
	"""
	Stream stuff from the Internet to a local file.
	"""
	response = requests.get(url, stream=True)
	with open(filepath, "wb") as out_file:
	shutil.copyfileobj(response.raw, out_file)

	def _save_image(url, imagename):
	"""
	Skips downloading if the image is already downloaded,
	otherwise downloads it locally.
	"""
	print("Downloading", imagename)
	already_present = os.listdir(".")
	if imagename in already_present:
	return
	download_stream(url, imagename)

	def _convert_jsonp_url_to_image_url(jsonp_url, found):
	"""
	Gets the image URL corresponding to the '.jsonp' URL.
	"""
	if jsonp_url.endswith(".jsonp"):
	replacement = jsonp_url.replace("/pages/", "/images/")
	if found:
	replacement = replacement.replace(".jsonp", "/000.jpg")
	else:
	replacement = replacement.replace(".jsonp", ".jpg")
	else:
	replacement = jsonp_url
	return replacement

	def _extract_jsonp_url(inner_opening):
	"""
	Extracts URLs ending with '.jsonp'. These URLs contain the
	raw document text.
	"""
	portion1 = inner_opening.find("https://")

	if portion1 == -1:
	jsonp = None
	else:
	portion2 = inner_opening.find(".jsonp")
	jsonp = inner_opening[portion1 : portion2 + 6]

	return jsonp

	js_text = soup.find_all("script", type="text/javascript")
	jsonp_urls = []
	for opening in js_text:
	for inner_opening in opening:
	jsonp = _extract_jsonp_url(inner_opening)
	if jsonp:
	jsonp_urls.append(jsonp)

	page_counter = 1
	initial_filename = "hacking_"
	found = False
	downloaded_images = []
	for jsonp_url in jsonp_urls:
	filename = "{}_{}.jpg".format(initial_filename, page_counter)
	img_url = _convert_jsonp_url_to_image_url(jsonp_url, found=found)
	img_url = _save_image(img_url, filename)
	downloaded_images.append(filename)
	page_counter += 1

	print(jsonp_urls)