Created
May 5, 2013 22:11
-
-
Save jrsmith3/5522397 to your computer and use it in GitHub Desktop.
Combining ideas from [getcites.py](https://gist.github.com/jrsmith3/5519665) and [doi2bib.py](https://gist.github.com/jrsmith3/5513926), generate a bibtex file of papers referenced by DOIs in the body of a LaTeX document.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import pycurl | |
import StringIO | |
from pybtex import auxfile | |
from pybtex.database.input import bibtex as bibtexin | |
from pybtex.database.output import bibtex as bibtexout | |
import pybtex.database | |
def doi2bib(doi): | |
""" | |
Return a bibTeX string of metadata for a given DOI. | |
""" | |
# Create an object to write data to | |
buf = StringIO.StringIO() | |
# Create the curl object | |
c = pycurl.Curl() | |
# Build the URL | |
url = "http://dx.doi.org/" + doi | |
# Set up the curl options | |
c.setopt(pycurl.URL, str(url)) | |
c.setopt(pycurl.HTTPHEADER, ["Accept: application/x-bibtex"]) | |
c.setopt(pycurl.FOLLOWLOCATION, 1) | |
c.setopt(pycurl.WRITEFUNCTION, buf.write) | |
c.setopt(c.CONNECTTIMEOUT, 5) | |
# Execute the curl request | |
c.perform() | |
bib = buf.getvalue() | |
buf.close() | |
return bib | |
# I want to get a list of citations from the LaTeX file. Probably the easiest way to do this is to build the LaTeX document then parse the auxfile for the citations. pybtex can do this for me. | |
aux_data = auxfile.parse_file("paper.aux", None) | |
cites = [] | |
for cite in aux_data.citations: | |
bibstr = doi2bib(cite) | |
cites.append(bibstr) | |
parser = bibtexin.Parser() | |
# Parse the results into a bibtex database object. | |
bib_data = parser.parse_stream(StringIO.StringIO("\n".join(cites))) | |
# Create empty bibtex database for entries with DOI for keys. | |
doikey_db = pybtex.database.BibliographyData() | |
for key in bib_data.entries.keys(): | |
en = bib_data.entries[key] | |
doikey_db.add_entry(en.fields["doi"], en) | |
# Write the result to a new bibtex database. | |
w = bibtexout.Writer() | |
w.write_file(doikey_db, "bibtex.bib") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
One problem with this is that pycurl can't deal with unicode.