Skip to content

Instantly share code, notes, and snippets.

@dela3499
Last active March 9, 2016 01:44
Show Gist options
  • Save dela3499/f4dd07d17296002eba72 to your computer and use it in GitHub Desktop.
Save dela3499/f4dd07d17296002eba72 to your computer and use it in GitHub Desktop.
Get mapping from DrugBank ID to drug name
execfile('/home/bioage/utils/utils.py')
import xmltodict #https://github.com/martinblech/xmltodict
drugbank_xml_file = "path/to/xml/file"
drugbank_json_file = "path/to/json/file" # save json here
drugbank_csv_file = "path/to/csv/file" # save csv here
# Convert XML to JSON and save
drugbank_dict = xmltodict.parse(read_file(drugbank_xml_file)) # takes around 6 minutes
write_file(drugbank_json_file, json.dumps(drugbank_dict))
# Save drug id and name to CSV
def parse_drug(drug_dict):
return dict(
name = str(drug_dict['name'].encode('utf-8')), # need to clean some non-ascii characters (around 6 drugs have this problems)
id = tryfuncs(
drug_dict['drugbank-id'],
[lambda x: x['#text'], # id contained in dict
lambda x: x[0]['#text']])) # id contained in dict, itself in a list
id2name = pd.DataFrame(map(parse_drug, drugbank_dict['drugbank']['drug']))
id2name.to_csv(drugbank_csv_file, index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment