Skip to content

Instantly share code, notes, and snippets.

@gabriel19913
Created April 7, 2020 01:07
Show Gist options
  • Save gabriel19913/82a35e0d6e68a29434b167989196f9e0 to your computer and use it in GitHub Desktop.
Save gabriel19913/82a35e0d6e68a29434b167989196f9e0 to your computer and use it in GitHub Desktop.
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlretrieve
base_url = 'https://link.springer.com/'
books_path = '/content/drive/My Drive/Springer Books/'
books = pd.read_csv(books_path + 'Springer Free Books - Data, Stats, Math & Tech.csv')
books.head()
links = books['OpenURL']
array_links = links.values
count = 0
for link in links:
page = requests.get(link)
soup = bs(page.text, 'html.parser')
try:
down_link = base_url + soup.find(class_ = 'cta-button-container__item').a.attrs['href']
except:
pass
finally:
try:
title = soup.find(class_ = 'page-title').h1.text
if '/' in title:
title = title.replace('/', ' ')
except:
subtitle = ''
urlretrieve(down_link, books_path + title + '.pdf')
print(count)
print(f"Book {title} saved on Google Drive")
count += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment