Last active
March 9, 2016 01:44
-
-
Save dela3499/f4dd07d17296002eba72 to your computer and use it in GitHub Desktop.
Get mapping from DrugBank ID to drug name
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
execfile('/home/bioage/utils/utils.py') | |
import xmltodict #https://github.com/martinblech/xmltodict | |
drugbank_xml_file = "path/to/xml/file" | |
drugbank_json_file = "path/to/json/file" # save json here | |
drugbank_csv_file = "path/to/csv/file" # save csv here | |
# Convert XML to JSON and save | |
drugbank_dict = xmltodict.parse(read_file(drugbank_xml_file)) # takes around 6 minutes | |
write_file(drugbank_json_file, json.dumps(drugbank_dict)) | |
# Save drug id and name to CSV | |
def parse_drug(drug_dict): | |
return dict( | |
name = str(drug_dict['name'].encode('utf-8')), # need to clean some non-ascii characters (around 6 drugs have this problems) | |
id = tryfuncs( | |
drug_dict['drugbank-id'], | |
[lambda x: x['#text'], # id contained in dict | |
lambda x: x[0]['#text']])) # id contained in dict, itself in a list | |
id2name = pd.DataFrame(map(parse_drug, drugbank_dict['drugbank']['drug'])) | |
id2name.to_csv(drugbank_csv_file, index = False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment