Created
September 6, 2019 15:01
-
-
Save romanegloo/20d7bf84908fcfb4ebb880952849814d to your computer and use it in GitHub Desktop.
Find corresponding MeSH terms from CUIs in evaluation datasets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# pylint: disable=invalid-name | |
"""Reads UMNSRS datasets where CUIs are used, add corresponding MeSHes to the | |
CUIs""" | |
from pathlib import Path | |
import csv | |
from tqdm import tqdm | |
from BMET.uts_api_client import UtsClient | |
file_in = Path('') | |
while not file_in.is_file(): | |
user_input = input("Enter path to a csv reference file containing CUIs: ") | |
file_in = Path(user_input) | |
file_out = Path(file_in.with_suffix('').as_posix() + '_mesh' + | |
file_in.suffix) | |
# Gather all the CUI codes | |
uts_client = UtsClient() | |
rows = sum(1 for line in open(file_in)) - 1 | |
with open(file_in) as fin, open(file_out, 'w') as fout: | |
csv_reader = csv.DictReader(fin) | |
csv_writer = csv.writer(fout, quotechar='"') | |
csv_writer.writerow(csv_reader.fieldnames + ['MESH1', 'MESH2']) | |
pbar = tqdm(total=rows) | |
for flds in csv_reader: | |
pbar.update() | |
for i in '12': | |
cui = flds['CUI'+i] | |
tok = flds['TERM'+i] | |
try: | |
rst = uts_client.get_concept_mesh_atoms(cui, tok) | |
if rst is None: | |
flds['MESH'+i] = 'None' | |
else: | |
mesh = rst['result'][0]['code'].split('/')[-1] | |
flds['MESH'+i] = mesh | |
except Exception as e: | |
print(f"Error: {e} Cui: {cui} Rec: {flds}") | |
raise | |
csv_writer.writerow(flds.values()) | |
pbar.close() | |
print(f"Finished writing records on {file_out}") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment