Last active
December 20, 2015 06:49
-
-
Save fabiobatalha/6089045 to your computer and use it in GitHub Desktop.
API to query DOI's to Crossref according to a given metadata.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
import urllib2 | |
from xml.etree import ElementTree | |
from xml.etree.ElementTree import Element | |
from urllib import urlencode | |
class FindDoi(object): | |
def __init__(self, format='unixref'): | |
allowed_formats = ['unixref', 'unixsd', 'xsd_xml'] | |
if format in allowed_formats: | |
self._format = format | |
else: | |
raise ValueError('format %s not allowed' % format) | |
def extract_doi_from_xml(self, xml): | |
try: | |
if self._format == 'unixref': | |
doi = ElementTree.fromstring(xml).find(".//doi_data/doi").text | |
elif self._format == 'unixsd': | |
doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/3.0}doi").text | |
elif self._format == 'xsd_xml': | |
doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/2.0}doi").text | |
except AttributeError: | |
doi = None | |
return doi | |
def get_query_batch_xml(self, | |
key='any', | |
email_address="[email protected]", | |
doi_batch_id="crossref", | |
issn=None, | |
journal_title=None, | |
article_title=None, | |
author=None, | |
year=None, | |
volume=None, | |
issue=None, | |
first_page=None): | |
ET = ElementTree | |
attrib = {'version': '2.0', | |
'xmlns': 'http://www.crossref.org/qschema/2.0', | |
'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', | |
'xsi:schemaLocation': 'http://www.crossref.org/qschema/2.0 file:/Users/fabiobatalha/Trabalho/tmp/crossref/crossref_query_input2.0.xsd'} | |
equery_batch = Element('query_batch', attrib=attrib) | |
ehead = Element('head') | |
eemail_address = Element('email_address') | |
eemail_address.text = email_address | |
edoi_batch_id = Element('doi_batch_id') | |
edoi_batch_id.text = doi_batch_id | |
ebody = Element('body') | |
equery = Element('query', attrib={'enable-multiple-hits': 'false', | |
'forward-match': 'false', | |
'key': key}) | |
eissn = Element('issn', match='optional') | |
eissn.text = issn | |
ejournal_title = Element('journal_title', match='optional') | |
ejournal_title.text = journal_title | |
earticle_title = Element('article_title', match='fuzzy') | |
earticle_title.text = article_title | |
eauthor = Element('author', match='optional') | |
eauthor.text = author | |
eyear = Element('year', match='optional') | |
eyear.text = year | |
evolume = Element('volume', match='optional') | |
evolume.text = volume | |
eissue = Element('issue') | |
eissue.text = issue | |
efirst_page = Element('first_page', match='optional') | |
efirst_page.text = first_page | |
equery_batch.append(ehead) | |
equery_batch.append(ebody) | |
ehead.append(eemail_address) | |
ehead.append(edoi_batch_id) | |
ebody.append(equery) | |
if issn: | |
equery.append(eissn) | |
if author: | |
equery.append(eauthor) | |
if volume: | |
equery.append(evolume) | |
if issue: | |
equery.append(eissue) | |
if first_page: | |
equery.append(efirst_page) | |
if year: | |
equery.append(eyear) | |
if article_title: | |
equery.append(earticle_title) | |
query_xml = ET.tostring(equery_batch, encoding='utf-8', method='xml') | |
return query_xml | |
def query_to_crossref(self, query_xml, user=None, passwd=None): | |
data = {'usr': user, | |
'pwd': passwd, | |
'format': self._format, | |
'qdata': '<?xml version = "1.0" encoding="utf-8"?>%s' % query_xml} | |
req = urllib2.Request("http://doi.crossref.org/servlet/query", urlencode(data)) | |
return urllib2.urlopen(req).read() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment