Created
June 18, 2019 09:21
-
-
Save avrilcoghlan/2a2fb40973a3138a33a1f52a5f12a6f7 to your computer and use it in GitHub Desktop.
Script to retrieve the PDB ligand ids. for a list of input ChEMBL compounds
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# script to find out the PDB three-letter ligand id. for a ChEMBL id., using UniChem | |
import argparse | |
import sys | |
import requests # this is used to access json files | |
PY3 = sys.version > '3' | |
if PY3: | |
import urllib.request as urllib2 | |
else: | |
import urllib2 | |
SERVER_URL = "https://www.ebi.ac.uk/unichem/rest" | |
UNICHEM = "/src_compound_id" | |
#====================================================================# | |
def get_request(url, arg, pretty=False): | |
full_url = "%s/%s/%s/1/3" % (SERVER_URL, url, arg) | |
# e.g. for ChEMBL id. CHEMBL14249 we get: | |
# full_url = https://www.ebi.ac.uk/unichem/rest//src_compound_id/CHEMBL14249/1/3 | |
# print("This is the url string:\n{}".format(full_url)) | |
json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format | |
# e.g. [{'src_compound_id': 'ATP'}] | |
# This seems to have been changed from json format to Python format by the 'requests' module. | |
if len(json_results) > 0 and 'error' not in json_results: | |
# Pull out the dictionary: | |
json_results2 = json_results[0] # e.g. a dictionary like {'src_compound_id': 'ATP'} | |
# pull out the PDB ligand id. for this ChEMBL id.: | |
ligand_id = json_results2['src_compound_id'] | |
else: | |
ligand_id = "NA" | |
# print("Ligand id. in PDB:",ligand_id) | |
return(ligand_id) | |
#====================================================================# | |
def read_chembl_idlist(chemblidlistfile): | |
idlist = list() | |
inputfileObj = open(chemblidlistfile, "r") | |
for line in inputfileObj: | |
line = line.rstrip() | |
temp = line.split("\t") | |
chemblid = temp[0] | |
idlist.append(chemblid) | |
inputfileObj.close() | |
return idlist | |
#====================================================================# | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('-e', type=str, default=None, action='store', help='the chemblID list file') | |
args = parser.parse_args() | |
# If you type: | |
# % python3 unichem_rest_example_get_pdbligandids_for_chemblidlist.py | |
# usage: unichem_rest_example_get_pdbligandids_for_chemblidlist.py [-h] [-e E] | |
# optional arguments: | |
# -h, --help show this help message and exit | |
# -e E the chemblID list file | |
if args.e: | |
# now read in the list of chembl ids: | |
chembl_idlist = read_chembl_idlist(args.e) | |
else: | |
parser.print_help() | |
sys.exit(1) | |
# get the PDB ligand id. for each of the chembl ids: | |
cnt = 0 | |
for chembl_id in chembl_idlist: | |
# Note we defined at the top of the script that: | |
# UNICHEM = "/rest/src_compound_id" | |
pdbligand_id = get_request(UNICHEM, chembl_id, True) | |
cnt += 1 | |
print(cnt,chembl_id,pdbligand_id) | |
print("FINISHED\n") | |
#====================================================================# | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment