-
-
Save pristanna/86e3c452195823005ff60c6376b113e2 to your computer and use it in GitHub Desktop.
Python Script to Download Springer Textbooks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd, os, shutil | |
df = pd.read_excel("Free+English+textbooks.xlsx") | |
for cat in df["English Package Name"].unique(): | |
try: | |
os.mkdir("download/" + cat) | |
except FileExistsError: | |
print("File exists errror") | |
except: | |
print("Some other error") | |
for index, row in df.iterrows(): | |
category = row.loc["English Package Name"] | |
file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/', '-').replace(':', '-') | |
try: | |
shutil.move(f"download/{file_name}.pdf", f"download/{category}/{file_name}.pdf") | |
except FileNotFoundError: | |
print("File not found") | |
except: | |
print("Some other error") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import requests | |
#import wget | |
#import urllib.request | |
df = pd.read_excel("Free+English+textbooks.xlsx") | |
print(df.head(10)) | |
for index, row in df.iterrows(): | |
# loop through the excel list | |
file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-') | |
url = f"{row.loc['OpenURL']}" | |
r = requests.get(url) | |
download_url = f"{r.url.replace('book','content/pdf')}.pdf" | |
print(download_url) | |
#wget.download(download_url, f"./download/{file_name}.pdf") # Not working | |
#urllib.request.urlretrieve(download_url, f"./download/{file_name}.pdf") # Not working either | |
print(f"wget {download_url} -O '{file_name}.pdf'") # Creates a command for bash and than I download it using normal bash wget command | |
#print(f"downloading {file_name}.pdf Complete ....") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Download All Free Textbooks from Springer using Python
A Step-by-Step Guide to Download Multiple Files Using Python
by Joe T. Santhanavanich
Article: https://towardsdatascience.com/download-all-free-textbooks-from-springer-using-python-bd0b10e0ccc
Script for classification: https://gist.github.com/juanluisrto/66e2bf157ab32719210cdcac2327f3a3