Created
March 22, 2021 10:20
-
-
Save telatin/8c1cfdd6b84d6b59fd7891f5ba1af9b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Parses a file with a list of URLs (GTR user) and prints | |
# a table with their projects details | |
import json | |
import sys | |
import traceback | |
import urllib3 | |
import xmltodict | |
from pprint import pprint | |
from IPython import embed | |
from urllib.parse import unquote | |
def eprint(*args, **kwargs): | |
print(*args, file=sys.stderr, **kwargs) | |
def getxml(url): | |
http = urllib3.PoolManager() | |
response = http.request('GET', url) | |
try: | |
data = xmltodict.parse(response.data) | |
except: | |
eprint("Failed to parse xml from response (%s)" % traceback.format_exc()) | |
return data | |
if (len(sys.argv) < 2): | |
quit() | |
def get_pi(data): | |
try: | |
if data['gtr:roles']['gtr:role']['gtr:name'] == 'PRINCIPAL_INVESTIGATOR': | |
return data['gtr:firstName'] + ' ' +data['gtr:surname'] | |
except Exception as e: | |
eprint("PI not found") | |
return "" | |
def get_user(data): | |
"""{ | |
"@url": "https://gtr.ukri.org:443/person/FF0FC8ED-2B76-440A-9E29-12790BAE28BA", | |
"gtr:id": "FF0FC8ED-2B76-440A-9E29-12790BAE28BA", | |
"gtr:firstName": "John", | |
"gtr:otherNames": "William", | |
"gtr:surname": "Moreau" | |
}""" | |
try: | |
person = data['gtr:personOverview']['gtr:person'] | |
if 'gtr:otherNames' in person: | |
return (person['gtr:id'],person['gtr:surname'], person['gtr:firstName'], person['gtr:otherNames']) | |
else: | |
return (person['gtr:id'],person['gtr:surname'], person['gtr:firstName'], "") | |
except Exception as e: | |
eprint(f"Unable to retrieve USER details: <{e}>") | |
return ('','','') | |
with open(sys.argv[1], 'r') as f: | |
l = f.readlines() | |
cnt = 0 | |
print(f"AuthorUrl\tuserId\tuserName\tuserSurname\tuserOther\tprojectCount\tPI\tprojectId\tprojectTitle\tgrantStart\tgrantEnd\tgrantPounds\tincomeType") | |
totUrls = len(l) | |
for url in l: | |
if url[0]=='#': | |
continue | |
cnt += 1 | |
# Get USER | |
url = url.rstrip('\n') | |
rawdata = getxml(url) | |
eprint(f"{cnt}/{totUrls}\tparsing {url}") | |
# Name, surname from ID | |
(userId, userName, userSurname, userOther) = get_user(rawdata) | |
data = rawdata['gtr:personOverview']['gtr:projectSearchResult']['gtr:results']['gtr:projectOverview'] | |
# Iterate through projects | |
pCount = 0 | |
for project in data: | |
try: | |
p = project['gtr:projectComposition'] | |
except Exception as e: | |
p = data[project] | |
grantType = p['gtr:project']['gtr:grantCategory'] | |
PI= userName + ' ' + userSurname + '*' # Star when inferred from user URL and not from project PI name | |
if p['gtr:personRoles'] is not None: | |
PI= get_pi(p['gtr:personRoles']['gtr:personRole']) | |
pCount += 1 | |
# get Project ID and Title | |
projectTitle='<Unknown>' | |
projectId='<Unknown>' | |
try: | |
projectTitle = p['gtr:project']['gtr:title'] | |
projectId = p['gtr:project']['gtr:id'] | |
except: | |
pass | |
# Get start,end, amount | |
grantStart, grantEnd, grantPounds,incomeType = ('<Unknown>', '<Unknown>', 0, '<Unknown>') | |
try: | |
grantStart, grantEnd, grantPounds, incomeType = (p['gtr:project']['gtr:fund']['gtr:start'],p['gtr:project']['gtr:fund']['gtr:end'], p['gtr:project']['gtr:fund']['gtr:valuePounds'], p['gtr:project']['gtr:fund']['gtr:type']) | |
except: | |
pass | |
print(f"{url}\t{userId}\t{userName}\t{userSurname}\t{userOther}\t{pCount}\t{PI}\t{projectId}\t{projectTitle}\t{grantStart}\t{grantEnd}\t{grantPounds}\t{incomeType}") | |
#print(json.dumps(p['gtr:project']['gtr:fund'], indent=2)) | |
""" | |
STUDENTSHIP | |
{ | |
"gtr:leadResearchOrganisation": { | |
"@url": "https://gtr.ukri.org:443/organisation/D1774113-D5D2-4B7C-A412-66A90FE4B96F", | |
"gtr:id": "D1774113-D5D2-4B7C-A412-66A90FE4B96F", | |
"gtr:name": "University of Cambridge" | |
}, | |
"gtr:personRoles": null, | |
"gtr:project": { | |
"@url": "https://gtr.ukri.org:443/projects?ref=studentship-2407790", | |
"gtr:id": "44405E8A-CEB9-4281-ABAE-1F2B3C6904F1", | |
"gtr:title": "Developing therapeutic monoclonal antibodies for ESKAPE pathogens", | |
"gtr:grantCategory": "Studentship", | |
"gtr:fund": { | |
"gtr:end": "2024-03-31", | |
"gtr:funder": { | |
"@url": "https://gtr.ukri.org:443/organisation/C008C651-F5B0-4859-A334-5F574AB6B57C", | |
"gtr:id": "C008C651-F5B0-4859-A334-5F574AB6B57C", | |
"gtr:name": "MRC" | |
}, | |
"gtr:start": "2020-10-01", | |
"gtr:type": "INCOME_ACTUAL", | |
"gtr:valuePounds": "0" | |
}, | |
"gtr:healthCategories": null, | |
"gtr:researchActivities": null, | |
"gtr:researchSubjects": null, | |
"gtr:researchTopics": null, | |
"gtr:rcukProgrammes": null | |
} | |
} | |
""" | |
""" | |
Project | |
{ | |
"gtr:leadResearchOrganisation": { | |
"@url": "https://gtr.ukri.org:443/organisation/6676402F-8287-464E-9141-7E4118B331E7", | |
"gtr:id": "6676402F-8287-464E-9141-7E4118B331E7", | |
"gtr:name": "Natural Environment Research Council" | |
}, | |
"gtr:personRoles": { | |
"gtr:personRole": { | |
"@url": "https://gtr.ukri.org:443/person/BB406388-0395-4344-98DD-645C13AEBD7B", | |
"gtr:id": "BB406388-0395-4344-98DD-645C13AEBD7B", | |
"gtr:firstName": "Andrew", | |
"gtr:surname": "Singer", | |
"gtr:roles": { | |
"gtr:role": { | |
"gtr:name": "PRINCIPAL_INVESTIGATOR" | |
} | |
} | |
} | |
}, | |
"gtr:project": { | |
"@url": "https://gtr.ukri.org:443/projects?ref=G0902420", | |
"gtr:id": "8363ED05-28F4-4500-9BAD-F2E166F692CA", | |
"gtr:title": "Environmental Aetiology of Diarrhoeagenic Pathogens in Children in a Developing Country Setting", | |
"gtr:grantCategory": "Research Grant", | |
"gtr:fund": { | |
"gtr:end": "2011-03-31", | |
"gtr:funder": { | |
"@url": "https://gtr.ukri.org:443/organisation/C008C651-F5B0-4859-A334-5F574AB6B57C", | |
"gtr:id": "C008C651-F5B0-4859-A334-5F574AB6B57C", | |
"gtr:name": "MRC" | |
}, | |
"gtr:start": "2010-05-01", | |
"gtr:type": "INCOME_ACTUAL", | |
"gtr:valuePounds": "39864" | |
}, | |
"gtr:healthCategories": null, | |
"gtr:researchActivities": null, | |
"gtr:researchSubjects": null, | |
"gtr:researchTopics": null, | |
"gtr:rcukProgrammes": null | |
} | |
} | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment