-
-
Save 4e1e0603/c33bb6b06e43383c2b0fcd72eda1723b to your computer and use it in GitHub Desktop.
Rename an academic article pdf with human readable format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import requests | |
import PyPDF2 | |
import requests | |
import os | |
crossref = 'http://api.crossref.org/' | |
def rename(pdf): | |
"""Rename an academic article pdf file with human readable format | |
Arguments: | |
pdf {String} -- pdf file name | |
""" | |
pdfReader = PyPDF2.PdfFileReader(open(pdf, 'rb')) | |
for k, v in pdfReader.documentInfo.items(): | |
if 'doi' in k: | |
doi = v | |
break | |
url = '{}works/{}'.format(crossref, doi) | |
r = requests.get(url) | |
item = r.json() | |
# no error handlings and will not work for very new articles | |
abbreviated_journal = item['message']['short-container-title'][0] | |
year = item['message']['created']['date-parts'][0][0] | |
volume = item['message']['volume'] | |
page = item['message']['page'] | |
# format: abbreviated_journal year, volume, page.pdf | |
name = '{} {}, {}, {}'.format(abbreviated_journal, year, volume, page) | |
os.rename(pdf, name+'.pdf') | |
if __name__ == '__main__': | |
rename(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment