Created
September 7, 2012 20:41
-
-
Save neuromusic/3669439 to your computer and use it in GitHub Desktop.
demo of hacked-together script for creating a Mendeley collection from a set of PMIDs. uses Mendeley API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# Requires Python >2.7.2 | |
# CAUTION: this script will create a new collection in your Mendeley library and add a bunch of documents to it. | |
import json | |
import urlparse | |
from mendeley_client import MendeleyClient | |
from Bio import Entrez, Medline | |
def update_uploaded_list(mendeley, group_id): | |
"""helper function to update the list of documents already in the group""" | |
docs = mendeley.group_documents(group_id,items='5000') | |
uploaded_PMIDs = [] | |
for doc_id in docs['document_ids']: | |
doc_details = mendeley.group_doc_details(group_id,doc_id) | |
if ('pmid' in doc_details['identifiers']): | |
uploaded_PMIDs.append(doc_details['identifiers']['pmid']) | |
return uploaded_PMIDs | |
consumer_key = 'XXXXXXXXXXXXXXXX' | |
consumer_secret = 'XXXXXXXXXXXXXXXX' | |
mendeley = MendeleyClient(consumer_key, consumer_secret) | |
try: | |
mendeley.load_keys() | |
except IOError: | |
mendeley.get_required_keys() | |
mendeley.save_keys() | |
# check to see if there is already a group w/ 'ABCD' in the title | |
abcd_group_id = [] | |
groups = mendeley.groups() | |
for gr in groups: | |
if 'abcd' in gr['name'].lower(): | |
abcd_group_id = gr['id'] | |
if not abcd_group_id: | |
# create public invite-only group for ABCD | |
response = mendeley.create_group(group=json.dumps({'name':'ABCD', 'type': 'invite'})) | |
abcd_group_id = response['group_id'] | |
# load ABCD dump | |
with open("abcd_connections.json","rb") as f: | |
connections = json.load(f) | |
PMIDs = [] | |
ASINs = [] | |
# for each citation in the ABCD connection dump | |
for cnxn_id, cnxn_info in connections.iteritems(): | |
if 'citations' in cnxn_info: | |
for cite in cnxn_info['citations']: | |
if 'PMID' in cite.keys(): | |
if not cite['PMID'] in PMIDs: | |
PMIDs.append(cite['PMID']) | |
elif 'ASIN' in cite.keys(): | |
if not cite['ASIN'] in ASINs: | |
ASINs.append(cite['ASIN']) | |
print "%s PMIDs referenced in the ABCD" % len(PMIDs) | |
print "updating the list of uploaded PMIDS..." | |
uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id) | |
print "%s PMIDs already uploaded" % len(uploaded_PMIDs) | |
for identifier in PMIDs: | |
# if it exists in the mendeley group, then skip it | |
if identifier in uploaded_PMIDs: | |
print "skip %s: %s" % ('pmid', identifier) | |
else: | |
print "adding %s: %s" % ('pmid', identifier) | |
document = mendeley.details(identifier, type='pmid') | |
if 'error' in document: | |
print "Error getting document: %s" % document['error'] | |
print "since PMID not found in Mendley, extracting basic info on %(identifier)s from pubmed" | |
handle = Entrez.efetch(db="pubmed", id=identifier, rettype="medline", retmode="text") | |
records = Medline.parse(handle) | |
records = list(records) | |
document = {'title': records[0]['TI'], | |
'authors': records[0]['FAU'], | |
'abstract': records[0]['AB'], | |
'identifiers': {'pmid': identifier}, | |
'type': 'Journal Article', | |
} | |
# strip extra fields to prep to add to group | |
fields_to_remove = ['error', | |
'mendeley_url', | |
'stats', | |
'uuid', | |
'publication_outlet', | |
'categories', | |
'groups',] | |
for field in fields_to_remove: | |
if field in document: | |
del document[field] | |
document['group_id'] = abcd_group_id | |
# add the citation to the group | |
response = mendeley.create_document(document=json.dumps(document)) | |
if 'error' in response: | |
print "Error adding document: %s" % response['error'] | |
else: | |
uploaded_PMIDs.append(identifier) | |
# #update the doc list | |
# print "updating the list of uploaded PMIDS..." | |
# uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id) | |
# print "%s PMIDs already uploaded" % len(uploaded_PMIDs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment