avrilcoghlan · June 18, 2019 09:21
diff --git a/unichem_rest_example_get_pdbligandids_for_chemblidlist.py b/unichem_rest_example_get_pdbligandids_for_chemblidlist.py
 #!/usr/bin/env python
 # script to find out the PDB three-letter ligand id. for a ChEMBL id., using UniChem

 import argparse
 import sys
 import requests # this is used to access json files

 PY3 = sys.version > '3'

 if PY3:
    import urllib.request as urllib2
 else:
    import urllib2

 SERVER_URL = "https://www.ebi.ac.uk/unichem/rest"

 UNICHEM = "/src_compound_id"

 #====================================================================#

 def get_request(url, arg, pretty=False):

    full_url = "%s/%s/%s/1/3" % (SERVER_URL, url, arg)

    # e.g. for ChEMBL id. CHEMBL14249 we get:
    #  full_url = https://www.ebi.ac.uk/unichem/rest//src_compound_id/CHEMBL14249/1/3
    # print("This is the url string:\n{}".format(full_url))

    json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
    # e.g. [{'src_compound_id': 'ATP'}]
    # This seems to have been changed from json format to Python format by the 'requests' module.

    if len(json_results) > 0 and 'error' not in json_results:
        # Pull out the dictionary:
        json_results2 = json_results[0] # e.g. a dictionary like {'src_compound_id': 'ATP'}

        # pull out the PDB ligand id. for this ChEMBL id.:
        ligand_id = json_results2['src_compound_id']
    else:
        ligand_id = "NA"

    # print("Ligand id. in PDB:",ligand_id)

    return(ligand_id)

 #====================================================================#

 def read_chembl_idlist(chemblidlistfile):

    idlist = list()
    
        inputfileObj = open(chemblidlistfile, "r")
    for line in inputfileObj:
        line = line.rstrip()
        temp = line.split("\t")
        chemblid = temp[0]
        idlist.append(chemblid)
    inputfileObj.close()

    return idlist

 #====================================================================#

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-e', type=str, default=None, action='store', help='the chemblID list file')
    args = parser.parse_args()

    # If you type:
    #  % python3 unichem_rest_example_get_pdbligandids_for_chemblidlist.py
    #  usage: unichem_rest_example_get_pdbligandids_for_chemblidlist.py [-h] [-e E]
    #  optional arguments:
    # -h, --help  show this help message and exit
    # -e E        the chemblID list file

    if args.e:
        # now read in the list of chembl ids:
        chembl_idlist = read_chembl_idlist(args.e)
    else:
        parser.print_help()
        sys.exit(1)

    # get the PDB ligand id. for each of the chembl ids:
    cnt = 0
    for chembl_id in chembl_idlist:
    # Note we defined at the top of the script that:
    # UNICHEM = "/rest/src_compound_id"
        pdbligand_id = get_request(UNICHEM, chembl_id, True)
        cnt += 1
        print(cnt,chembl_id,pdbligand_id)

    print("FINISHED\n")

 #====================================================================#
	#!/usr/bin/env python
	# script to find out the PDB three-letter ligand id. for a ChEMBL id., using UniChem

	import argparse
	import sys
	import requests # this is used to access json files

	PY3 = sys.version > '3'

	if PY3:
	import urllib.request as urllib2
	else:
	import urllib2

	SERVER_URL = "https://www.ebi.ac.uk/unichem/rest"

	UNICHEM = "/src_compound_id"

	#====================================================================#

	def get_request(url, arg, pretty=False):

	full_url = "%s/%s/%s/1/3" % (SERVER_URL, url, arg)

	# e.g. for ChEMBL id. CHEMBL14249 we get:
	# full_url = https://www.ebi.ac.uk/unichem/rest//src_compound_id/CHEMBL14249/1/3
	# print("This is the url string:\n{}".format(full_url))

	json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
	# e.g. [{'src_compound_id': 'ATP'}]
	# This seems to have been changed from json format to Python format by the 'requests' module.

	if len(json_results) > 0 and 'error' not in json_results:
	# Pull out the dictionary:
	json_results2 = json_results[0] # e.g. a dictionary like {'src_compound_id': 'ATP'}

	# pull out the PDB ligand id. for this ChEMBL id.:
	ligand_id = json_results2['src_compound_id']
	else:
	ligand_id = "NA"

	# print("Ligand id. in PDB:",ligand_id)

	return(ligand_id)

	#====================================================================#

	def read_chembl_idlist(chemblidlistfile):

	idlist = list()

	inputfileObj = open(chemblidlistfile, "r")
	for line in inputfileObj:
	line = line.rstrip()
	temp = line.split("\t")
	chemblid = temp[0]
	idlist.append(chemblid)
	inputfileObj.close()

	return idlist

	#====================================================================#

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
	parser.add_argument('-e', type=str, default=None, action='store', help='the chemblID list file')
	args = parser.parse_args()

	# If you type:
	# % python3 unichem_rest_example_get_pdbligandids_for_chemblidlist.py
	# usage: unichem_rest_example_get_pdbligandids_for_chemblidlist.py [-h] [-e E]
	# optional arguments:
	# -h, --help show this help message and exit
	# -e E the chemblID list file

	if args.e:
	# now read in the list of chembl ids:
	chembl_idlist = read_chembl_idlist(args.e)
	else:
	parser.print_help()
	sys.exit(1)

	# get the PDB ligand id. for each of the chembl ids:
	cnt = 0
	for chembl_id in chembl_idlist:
	# Note we defined at the top of the script that:
	# UNICHEM = "/rest/src_compound_id"
	pdbligand_id = get_request(UNICHEM, chembl_id, True)
	cnt += 1
	print(cnt,chembl_id,pdbligand_id)

	print("FINISHED\n")

	#====================================================================#