Created
April 29, 2020 19:49
-
-
Save deeplook/a31ecd29523b965d7957ec66308471d4 to your computer and use it in GitHub Desktop.
Fetch free Springer ebooks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Download free Springer ebooks. | |
Examples: | |
python fetch_springer_ebooks python | |
python fetch_springer_ebooks --dest my/ebooks math | |
python fetch_springer_ebooks | |
python fetch_springer_ebooks -h | |
Dependencies: | |
- Firefox | |
- geckodriver | |
- requests | |
- selenium | |
- python-slugify | |
""" | |
import argparse | |
import os | |
import requests | |
from selenium import webdriver | |
from selenium.webdriver.firefox.options import Options | |
from slugify import slugify | |
def download(args): | |
dest = args.dest | |
query = args.query | |
options = Options() | |
options.headless = True | |
driver = webdriver.Firefox(options=options, executable_path='geckodriver') | |
url = "https://springer.com" | |
driver.get(url) | |
url = "https://link.springer.com/search?facet-content-type=%22Book%22&package=mat-covid19_textbooks" | |
driver.get(url) | |
input_el = driver.find_element_by_xpath('//*[@id="query"]') | |
input_el.clear() | |
input_el.send_keys(query) | |
button_el = driver.find_element_by_xpath('//*[@id="search"]').click() | |
page = 0 | |
i = 0 | |
while True: | |
print(f"page {page}") | |
for book in driver.find_elements_by_xpath('//h2/a[@class="title"]'): | |
link = book.get_attribute("href") | |
url = link.replace("springer.com/book", "springer.com/content/pdf") + ".pdf" | |
text = book.text | |
base, ext = os.path.splitext(os.path.basename(url)) | |
fname = "%s-%s%s" % (base, slugify(text), ext) | |
path = os.path.join(dest, fname) | |
print(i, fname) | |
if not os.path.exists(path): | |
with open(path, "wb") as f: | |
f.write(requests.get(url).content) | |
i += 1 | |
try: | |
next = driver.find_element_by_xpath('//a[@class="next"]') | |
next.click() | |
except: | |
break | |
page += 1 | |
driver.close() | |
if __name__ == "__main__": | |
desc = "Download free Springer ebooks." | |
p = argparse.ArgumentParser(description=desc) | |
p.add_argument('query', metavar="TEXT", default="", | |
help='Query to search books, example: "python", default: "".') | |
p.add_argument('--dest', metavar='PATH', default=".", | |
help='Existing destination folder, default: "."') | |
args = p.parse_args() | |
download(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment