Created
February 20, 2017 02:47
-
-
Save efraintorlo/5107ad09779bfb60bd01133f679b6df3 to your computer and use it in GitHub Desktop.
Retrieve BibTeX info from PDF paper (it works if doi is included in the text)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyinspire import pyinspire | |
from PyPDF2 import PdfFileReader | |
import re | |
import sys | |
pdf_file = 'paper_from_prd.pdf' # local file | |
doi_re = re.compile('10.(\d)+/([^(\s\>\"\<)])+') | |
input = PdfFileReader(file(pdf_file, "rb")) | |
text = input.getPage(0).extractText() | |
m = doi_re.search(text) | |
doi = m.group(0) | |
print("The paper was originally published at:\nhttps://doi.org/"+doi) | |
result = pyinspire.get_text_from_inspire("find doi "+doi, resultformat='bibtex') | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment