KernelPanicAUS · June 16, 2020 11:11
diff --git a/dl.py b/dl.py
 from lxml import html
 import requests
 import os

 root_url = "https://hnarayanan.github.io/springer-books/"
 category = "Computer Science"
 download_path = f"{os.getcwd()}/downloads/{category.lower().replace(' ', '_')}"
 headers = {
    "Host": "link.springer.com",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://link.springer.com/",
    "DNT": "1",
    "Connection": "keep-alive",
    "Cookie": "sim-inst-token=1::1592330235412:4c812914; trackid=1897426dcf7a4d8f9c24344cd; recaptcha=8K1/HkRi4MLVzheCLwngJ5CMeCvMypHSbPY0yWv2KFc=",
    "Upgrade-Insecure-Requests": "1",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
 }


 def download_file(url, filename):
    extension = url.split(".")[-1]
    local_filename = f"{download_path}/{filename.replace('/','_')}.{extension}"
    with requests.get(
        f"https://link.springer.com{url}", headers=headers, stream=True
    ) as r:
        r.raise_for_status()
        with open(local_filename, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)


 def extract_download_links(link):
    print(f"Visiting link {link}")

    book_page = html.fromstring(requests.get(link).content)
    title = book_page.xpath('//div[@class="page-title"]/h1[1]/text()')[0]
    links = book_page.xpath('//a[contains(@class,"c-button__icon-right")][*]/@href')
    print(f"Title [{title}]")

    print(links)
    print("=======================")
    for book_link in links:
        download_file(book_link, title.lower().replace(" ", "_"))


 def main():
    page = requests.get(root_url)
    tree = html.fromstring(page.content)

    if not os.path.exists(download_path):
        os.makedirs(download_path)

    links = tree.xpath(
        f"//h2[text() ='{category}']/following-sibling::div[@class='row mt-2'][1]/div[*]/div[@class='card mb-2']/div/a/@href"
    )
    for link in links:
        extract_download_links(link)


 if __name__ == "__main__":
    main()
	from lxml import html
	import requests
	import os

	root_url = "https://hnarayanan.github.io/springer-books/"
	category = "Computer Science"
	download_path = f"{os.getcwd()}/downloads/{category.lower().replace(' ', '_')}"
	headers = {
	"Host": "link.springer.com",
	"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.5",
	"Accept-Encoding": "gzip, deflate, br",
	"Referer": "https://link.springer.com/",
	"DNT": "1",
	"Connection": "keep-alive",
	"Cookie": "sim-inst-token=1::1592330235412:4c812914; trackid=1897426dcf7a4d8f9c24344cd; recaptcha=8K1/HkRi4MLVzheCLwngJ5CMeCvMypHSbPY0yWv2KFc=",
	"Upgrade-Insecure-Requests": "1",
	"Pragma": "no-cache",
	"Cache-Control": "no-cache",
	}


	def download_file(url, filename):
	extension = url.split(".")[-1]
	local_filename = f"{download_path}/{filename.replace('/','_')}.{extension}"
	with requests.get(
	f"https://link.springer.com{url}", headers=headers, stream=True
	) as r:
	r.raise_for_status()
	with open(local_filename, "wb") as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)


	def extract_download_links(link):
	print(f"Visiting link {link}")

	book_page = html.fromstring(requests.get(link).content)
	title = book_page.xpath('//div[@class="page-title"]/h1[1]/text()')[0]
	links = book_page.xpath('//a[contains(@class,"c-button__icon-right")][*]/@href')
	print(f"Title [{title}]")

	print(links)
	print("=======================")
	for book_link in links:
	download_file(book_link, title.lower().replace(" ", "_"))


	def main():
	page = requests.get(root_url)
	tree = html.fromstring(page.content)

	if not os.path.exists(download_path):
	os.makedirs(download_path)

	links = tree.xpath(
	f"//h2[text() ='{category}']/following-sibling::div[@class='row mt-2'][1]/div[*]/div[@class='card mb-2']/div/a/@href"
	)
	for link in links:
	extract_download_links(link)


	if __name__ == "__main__":
	main()