deeplook · April 29, 2020 19:49
diff --git a/fetch_springer_ebooks.py b/fetch_springer_ebooks.py
 #!/usr/bin/env python3

 """
 Download free Springer ebooks.

 Examples:

  python fetch_springer_ebooks python
  python fetch_springer_ebooks --dest my/ebooks math
  python fetch_springer_ebooks
  python fetch_springer_ebooks -h

 Dependencies:
  - Firefox
  - geckodriver
  - requests
  - selenium
  - python-slugify
 """


 import argparse
 import os

 import requests
 from selenium import webdriver
 from selenium.webdriver.firefox.options import Options
 from slugify import slugify


 def download(args):
    dest = args.dest
    query = args.query

    options = Options()
    options.headless = True
    driver = webdriver.Firefox(options=options, executable_path='geckodriver')
    url = "https://springer.com"
    driver.get(url)
    url = "https://link.springer.com/search?facet-content-type=%22Book%22&package=mat-covid19_textbooks"
    driver.get(url)
    input_el = driver.find_element_by_xpath('//*[@id="query"]')
    input_el.clear()
    input_el.send_keys(query)
    button_el = driver.find_element_by_xpath('//*[@id="search"]').click()

    page = 0
    i = 0
    while True:
        print(f"page {page}")
        for book in driver.find_elements_by_xpath('//h2/a[@class="title"]'):
            link = book.get_attribute("href")
            url = link.replace("springer.com/book", "springer.com/content/pdf") + ".pdf"
            text = book.text
            base, ext = os.path.splitext(os.path.basename(url))
            fname = "%s-%s%s" % (base, slugify(text), ext)
            path = os.path.join(dest, fname)
            print(i, fname)
            if not os.path.exists(path):
                with open(path, "wb") as f:
                    f.write(requests.get(url).content)
            i += 1
        try:
            next = driver.find_element_by_xpath('//a[@class="next"]')
            next.click()
        except:
            break
        page += 1

    driver.close()


 if __name__ == "__main__":
    desc = "Download free Springer ebooks."
    p = argparse.ArgumentParser(description=desc)

    p.add_argument('query', metavar="TEXT", default="",
        help='Query to search books, example: "python", default: "".')
    p.add_argument('--dest', metavar='PATH', default=".",
        help='Existing destination folder, default: "."')

    args = p.parse_args()
    download(args)
	#!/usr/bin/env python3

	"""
	Download free Springer ebooks.

	Examples:

	python fetch_springer_ebooks python
	python fetch_springer_ebooks --dest my/ebooks math
	python fetch_springer_ebooks
	python fetch_springer_ebooks -h

	Dependencies:
	- Firefox
	- geckodriver
	- requests
	- selenium
	- python-slugify
	"""


	import argparse
	import os

	import requests
	from selenium import webdriver
	from selenium.webdriver.firefox.options import Options
	from slugify import slugify


	def download(args):
	dest = args.dest
	query = args.query

	options = Options()
	options.headless = True
	driver = webdriver.Firefox(options=options, executable_path='geckodriver')
	url = "https://springer.com"
	driver.get(url)
	url = "https://link.springer.com/search?facet-content-type=%22Book%22&package=mat-covid19_textbooks"
	driver.get(url)
	input_el = driver.find_element_by_xpath('//*[@id="query"]')
	input_el.clear()
	input_el.send_keys(query)
	button_el = driver.find_element_by_xpath('//*[@id="search"]').click()

	page = 0
	i = 0
	while True:
	print(f"page {page}")
	for book in driver.find_elements_by_xpath('//h2/a[@class="title"]'):
	link = book.get_attribute("href")
	url = link.replace("springer.com/book", "springer.com/content/pdf") + ".pdf"
	text = book.text
	base, ext = os.path.splitext(os.path.basename(url))
	fname = "%s-%s%s" % (base, slugify(text), ext)
	path = os.path.join(dest, fname)
	print(i, fname)
	if not os.path.exists(path):
	with open(path, "wb") as f:
	f.write(requests.get(url).content)
	i += 1
	try:
	next = driver.find_element_by_xpath('//a[@class="next"]')
	next.click()
	except:
	break
	page += 1

	driver.close()


	if __name__ == "__main__":
	desc = "Download free Springer ebooks."
	p = argparse.ArgumentParser(description=desc)

	p.add_argument('query', metavar="TEXT", default="",
	help='Query to search books, example: "python", default: "".')
	p.add_argument('--dest', metavar='PATH', default=".",
	help='Existing destination folder, default: "."')

	args = p.parse_args()
	download(args)